Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
all:
|
||||
$(MAKE) -C simx
|
||||
$(MAKE) -C rtlsim
|
||||
$(MAKE) -C vlsim
|
||||
$(MAKE) -C opaesim
|
||||
|
||||
clean:
|
||||
$(MAKE) -C simx clean
|
||||
$(MAKE) -C rtlsim clean
|
||||
$(MAKE) -C vlsim clean
|
||||
$(MAKE) -C opaesim clean
|
||||
@@ -1,7 +1,19 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
|
||||
constexpr uint32_t count_leading_zeros(uint32_t value) {
|
||||
@@ -77,5 +89,15 @@ T sext(const T& word, uint32_t width) {
|
||||
if (width == (sizeof(T) * 8))
|
||||
return word;
|
||||
T mask((static_cast<T>(1) << width) - 1);
|
||||
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word;
|
||||
}
|
||||
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : (word & mask);
|
||||
}
|
||||
|
||||
template <typename T = uint32_t>
|
||||
T zext(const T& word, uint32_t width) {
|
||||
assert(width > 1);
|
||||
assert(width <= (sizeof(T) * 8));
|
||||
if (width == (sizeof(T) * 8))
|
||||
return word;
|
||||
T mask((static_cast<T>(1) << width) - 1);
|
||||
return word & mask;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mem.h"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
@@ -20,8 +33,9 @@ RamMemDevice::RamMemDevice(const char *filename, uint32_t wordSize)
|
||||
contents_.push_back(input.get());
|
||||
} while (input);
|
||||
|
||||
while (contents_.size() & (wordSize-1))
|
||||
while (contents_.size() & (wordSize-1)) {
|
||||
contents_.push_back(0x00);
|
||||
}
|
||||
}
|
||||
|
||||
RamMemDevice::RamMemDevice(uint64_t size, uint32_t wordSize)
|
||||
@@ -29,7 +43,7 @@ RamMemDevice::RamMemDevice(uint64_t size, uint32_t wordSize)
|
||||
, wordSize_(wordSize)
|
||||
{}
|
||||
|
||||
void RamMemDevice::read(void *data, uint64_t addr, uint64_t size) {
|
||||
void RamMemDevice::read(void* data, uint64_t addr, uint64_t size) {
|
||||
auto addr_end = addr + size;
|
||||
if ((addr & (wordSize_-1))
|
||||
|| (addr_end & (wordSize_-1))
|
||||
@@ -44,7 +58,7 @@ void RamMemDevice::read(void *data, uint64_t addr, uint64_t size) {
|
||||
}
|
||||
}
|
||||
|
||||
void RamMemDevice::write(const void *data, uint64_t addr, uint64_t size) {
|
||||
void RamMemDevice::write(const void* data, uint64_t addr, uint64_t size) {
|
||||
auto addr_end = addr + size;
|
||||
if ((addr & (wordSize_-1))
|
||||
|| (addr_end & (wordSize_-1))
|
||||
@@ -68,26 +82,26 @@ void RomMemDevice::write(const void* /*data*/, uint64_t /*addr*/, uint64_t /*siz
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool MemoryUnit::ADecoder::lookup(uint64_t a, uint32_t wordSize, mem_accessor_t* ma) {
|
||||
uint64_t e = a + (wordSize - 1);
|
||||
assert(e >= a);
|
||||
bool MemoryUnit::ADecoder::lookup(uint64_t addr, uint32_t wordSize, mem_accessor_t* ma) {
|
||||
uint64_t end = addr + (wordSize - 1);
|
||||
assert(end >= addr);
|
||||
for (auto iter = entries_.rbegin(), iterE = entries_.rend(); iter != iterE; ++iter) {
|
||||
if (a >= iter->start && e <= iter->end) {
|
||||
if (addr >= iter->start && end <= iter->end) {
|
||||
ma->md = iter->md;
|
||||
ma->addr = a - iter->start;
|
||||
ma->addr = addr - iter->start;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void MemoryUnit::ADecoder::map(uint64_t a, uint64_t e, MemDevice &m) {
|
||||
assert(e >= a);
|
||||
entry_t entry{&m, a, e};
|
||||
void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) {
|
||||
assert(end >= start);
|
||||
entry_t entry{&md, start, end};
|
||||
entries_.emplace_back(entry);
|
||||
}
|
||||
|
||||
void MemoryUnit::ADecoder::read(void *data, uint64_t addr, uint64_t size) {
|
||||
void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
|
||||
mem_accessor_t ma;
|
||||
if (!this->lookup(addr, size, &ma)) {
|
||||
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
|
||||
@@ -96,7 +110,7 @@ void MemoryUnit::ADecoder::read(void *data, uint64_t addr, uint64_t size) {
|
||||
ma.md->read(data, ma.addr, size);
|
||||
}
|
||||
|
||||
void MemoryUnit::ADecoder::write(const void *data, uint64_t addr, uint64_t size) {
|
||||
void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) {
|
||||
mem_accessor_t ma;
|
||||
if (!this->lookup(addr, size, &ma)) {
|
||||
std::cout << "lookup of 0x" << std::hex << addr << " failed.\n";
|
||||
@@ -107,11 +121,11 @@ void MemoryUnit::ADecoder::write(const void *data, uint64_t addr, uint64_t size)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
MemoryUnit::MemoryUnit(uint64_t pageSize, uint64_t addrBytes, bool disableVm)
|
||||
MemoryUnit::MemoryUnit(uint64_t pageSize)
|
||||
: pageSize_(pageSize)
|
||||
, addrBytes_(addrBytes)
|
||||
, disableVM_(disableVm) {
|
||||
if (!disableVm) {
|
||||
, enableVM_(pageSize != 0)
|
||||
, amo_reservation_({0x0, false}) {
|
||||
if (pageSize != 0) {
|
||||
tlb_[0] = TLBEntry(0, 077);
|
||||
}
|
||||
}
|
||||
@@ -133,30 +147,38 @@ MemoryUnit::TLBEntry MemoryUnit::tlbLookup(uint64_t vAddr, uint32_t flagMask) {
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryUnit::read(void *data, uint64_t addr, uint64_t size, bool sup) {
|
||||
uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) {
|
||||
uint64_t pAddr;
|
||||
if (disableVM_) {
|
||||
pAddr = addr;
|
||||
} else {
|
||||
uint32_t flagMask = sup ? 8 : 1;
|
||||
if (enableVM_) {
|
||||
TLBEntry t = this->tlbLookup(addr, flagMask);
|
||||
pAddr = t.pfn * pageSize_ + addr % pageSize_;
|
||||
} else {
|
||||
pAddr = addr;
|
||||
}
|
||||
return pAddr;
|
||||
}
|
||||
|
||||
void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, sup ? 8 : 1);
|
||||
return decoder_.read(data, pAddr, size);
|
||||
}
|
||||
|
||||
void MemoryUnit::write(const void *data, uint64_t addr, uint64_t size, bool sup) {
|
||||
uint64_t pAddr;
|
||||
if (disableVM_) {
|
||||
pAddr = addr;
|
||||
} else {
|
||||
uint32_t flagMask = sup ? 16 : 2;
|
||||
TLBEntry t = tlbLookup(addr, flagMask);
|
||||
pAddr = t.pfn * pageSize_ + addr % pageSize_;
|
||||
}
|
||||
void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, sup ? 16 : 1);
|
||||
decoder_.write(data, pAddr, size);
|
||||
amo_reservation_.valid = false;
|
||||
}
|
||||
|
||||
void MemoryUnit::amo_reserve(uint64_t addr) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, 1);
|
||||
amo_reservation_.addr = pAddr;
|
||||
amo_reservation_.valid = true;
|
||||
}
|
||||
|
||||
bool MemoryUnit::amo_check(uint64_t addr) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, 1);
|
||||
return amo_reservation_.valid && (amo_reservation_.addr == pAddr);
|
||||
}
|
||||
void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags) {
|
||||
tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags);
|
||||
}
|
||||
@@ -168,12 +190,14 @@ void MemoryUnit::tlbRm(uint64_t va) {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
RAM::RAM(uint32_t page_size)
|
||||
: size_(0)
|
||||
RAM::RAM(uint32_t page_size, uint64_t capacity)
|
||||
: capacity_(capacity)
|
||||
, page_bits_(log2ceil(page_size))
|
||||
, last_page_(nullptr)
|
||||
, last_page_index_(0) {
|
||||
assert(ispow2(page_size));
|
||||
assert(0 == capacity || ispow2(capacity));
|
||||
assert(0 == (capacity % page_size));
|
||||
}
|
||||
|
||||
RAM::~RAM() {
|
||||
@@ -191,6 +215,9 @@ uint64_t RAM::size() const {
|
||||
}
|
||||
|
||||
uint8_t *RAM::get(uint64_t address) const {
|
||||
if (capacity_ != 0 && address >= capacity_) {
|
||||
throw OutOfRange();
|
||||
}
|
||||
uint32_t page_size = 1 << page_bits_;
|
||||
uint32_t page_offset = address & (page_size - 1);
|
||||
uint64_t page_index = address >> page_bits_;
|
||||
@@ -218,14 +245,14 @@ uint8_t *RAM::get(uint64_t address) const {
|
||||
return page + page_offset;
|
||||
}
|
||||
|
||||
void RAM::read(void *data, uint64_t addr, uint64_t size) {
|
||||
void RAM::read(void* data, uint64_t addr, uint64_t size) {
|
||||
uint8_t* d = (uint8_t*)data;
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
d[i] = *this->get(addr + i);
|
||||
}
|
||||
}
|
||||
|
||||
void RAM::write(const void *data, uint64_t addr, uint64_t size) {
|
||||
void RAM::write(const void* data, uint64_t addr, uint64_t size) {
|
||||
const uint8_t* d = (const uint8_t*)data;
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
*this->get(addr + i) = d[i];
|
||||
@@ -236,6 +263,7 @@ void RAM::loadBinImage(const char* filename, uint64_t destination) {
|
||||
std::ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
std::cout << "error: " << filename << " not found" << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
||||
ifs.seekg(0, ifs.end);
|
||||
@@ -268,6 +296,7 @@ void RAM::loadHexImage(const char* filename) {
|
||||
std::ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
std::cout << "error: " << filename << " not found" << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
||||
ifs.seekg(0, ifs.end);
|
||||
@@ -313,4 +342,4 @@ void RAM::loadHexImage(const char* filename) {
|
||||
++line;
|
||||
--size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
@@ -7,13 +20,14 @@
|
||||
|
||||
namespace vortex {
|
||||
struct BadAddress {};
|
||||
struct OutOfRange {};
|
||||
|
||||
class MemDevice {
|
||||
public:
|
||||
virtual ~MemDevice() {}
|
||||
virtual uint64_t size() const = 0;
|
||||
virtual void read(void *data, uint64_t addr, uint64_t size) = 0;
|
||||
virtual void write(const void *data, uint64_t addr, uint64_t size) = 0;
|
||||
virtual void read(void* data, uint64_t addr, uint64_t size) = 0;
|
||||
virtual void write(const void* data, uint64_t addr, uint64_t size) = 0;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -21,11 +35,11 @@ public:
|
||||
class RamMemDevice : public MemDevice {
|
||||
public:
|
||||
RamMemDevice(uint64_t size, uint32_t wordSize);
|
||||
RamMemDevice(const char *filename, uint32_t wordSize);
|
||||
RamMemDevice(const char* filename, uint32_t wordSize);
|
||||
~RamMemDevice() {}
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void *data, uint64_t addr, uint64_t size) override;
|
||||
void read(void* data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void* data, uint64_t addr, uint64_t size) override;
|
||||
|
||||
virtual uint64_t size() const {
|
||||
return contents_.size();
|
||||
@@ -50,7 +64,7 @@ public:
|
||||
|
||||
~RomMemDevice();
|
||||
|
||||
void write(const void *data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void* data, uint64_t addr, uint64_t size) override;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -63,47 +77,56 @@ public:
|
||||
: faultAddr(a)
|
||||
, notFound(nf)
|
||||
{}
|
||||
uint64_t faultAddr;
|
||||
bool notFound;
|
||||
uint64_t faultAddr;
|
||||
bool notFound;
|
||||
};
|
||||
|
||||
MemoryUnit(uint64_t pageSize, uint64_t addrBytes, bool disableVm = false);
|
||||
MemoryUnit(uint64_t pageSize = 0);
|
||||
|
||||
void attach(MemDevice &m, uint64_t start, uint64_t end);
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size, bool sup);
|
||||
void write(const void *data, uint64_t addr, uint64_t size, bool sup);
|
||||
void read(void* data, uint64_t addr, uint64_t size, bool sup);
|
||||
void write(const void* data, uint64_t addr, uint64_t size, bool sup);
|
||||
|
||||
void amo_reserve(uint64_t addr);
|
||||
bool amo_check(uint64_t addr);
|
||||
|
||||
void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags);
|
||||
void tlbRm(uint64_t va);
|
||||
void tlbRm(uint64_t vaddr);
|
||||
void tlbFlush() {
|
||||
tlb_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
struct amo_reservation_t {
|
||||
uint64_t addr;
|
||||
bool valid;
|
||||
};
|
||||
|
||||
class ADecoder {
|
||||
public:
|
||||
ADecoder() {}
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size);
|
||||
void write(const void *data, uint64_t addr, uint64_t size);
|
||||
void read(void* data, uint64_t addr, uint64_t size);
|
||||
void write(const void* data, uint64_t addr, uint64_t size);
|
||||
|
||||
void map(uint64_t start, uint64_t end, MemDevice &md);
|
||||
|
||||
private:
|
||||
|
||||
struct mem_accessor_t {
|
||||
MemDevice* md;
|
||||
uint64_t addr;
|
||||
MemDevice* md;
|
||||
uint64_t addr;
|
||||
};
|
||||
|
||||
struct entry_t {
|
||||
MemDevice *md;
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
MemDevice* md;
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
};
|
||||
|
||||
bool lookup(uint64_t a, uint32_t wordSize, mem_accessor_t*);
|
||||
bool lookup(uint64_t addr, uint32_t wordSize, mem_accessor_t*);
|
||||
|
||||
std::vector<entry_t> entries_;
|
||||
};
|
||||
@@ -120,11 +143,14 @@ private:
|
||||
|
||||
TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask);
|
||||
|
||||
uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask);
|
||||
|
||||
std::unordered_map<uint64_t, TLBEntry> tlb_;
|
||||
uint64_t pageSize_;
|
||||
uint64_t addrBytes_;
|
||||
ADecoder decoder_;
|
||||
bool disableVM_;
|
||||
uint64_t pageSize_;
|
||||
ADecoder decoder_;
|
||||
bool enableVM_;
|
||||
|
||||
amo_reservation_t amo_reservation_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -132,15 +158,15 @@ private:
|
||||
class RAM : public MemDevice {
|
||||
public:
|
||||
|
||||
RAM(uint32_t page_size);
|
||||
RAM(uint32_t page_size, uint64_t capacity = 0);
|
||||
~RAM();
|
||||
|
||||
void clear();
|
||||
|
||||
uint64_t size() const override;
|
||||
|
||||
void read(void *data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void *data, uint64_t addr, uint64_t size) override;
|
||||
void read(void* data, uint64_t addr, uint64_t size) override;
|
||||
void write(const void* data, uint64_t addr, uint64_t size) override;
|
||||
|
||||
void loadBinImage(const char* filename, uint64_t destination);
|
||||
void loadHexImage(const char* filename);
|
||||
@@ -157,11 +183,11 @@ private:
|
||||
|
||||
uint8_t *get(uint64_t address) const;
|
||||
|
||||
uint64_t size_;
|
||||
uint64_t capacity_;
|
||||
uint32_t page_bits_;
|
||||
mutable std::unordered_map<uint64_t, uint8_t*> pages_;
|
||||
mutable uint8_t* last_page_;
|
||||
mutable uint64_t last_page_index_;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
} // namespace vortex
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stack>
|
||||
@@ -18,8 +31,9 @@ public:
|
||||
void* allocate() {
|
||||
void* mem;
|
||||
if (!free_list_.empty()) {
|
||||
mem = static_cast<void*>(free_list_.top());
|
||||
auto entry = free_list_.top();
|
||||
free_list_.pop();
|
||||
mem = static_cast<void*>(entry);
|
||||
} else {
|
||||
mem = ::operator new(sizeof(T));
|
||||
}
|
||||
@@ -36,12 +50,13 @@ public:
|
||||
|
||||
void flush() {
|
||||
while (!free_list_.empty()) {
|
||||
::operator delete(free_list_.top());
|
||||
auto entry = free_list_.top();
|
||||
free_list_.pop();
|
||||
::operator delete(entry);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::stack<void*> free_list_;
|
||||
std::stack<T*> free_list_;
|
||||
uint32_t max_size_;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "rvfloats.h"
|
||||
#include <stdio.h>
|
||||
|
||||
@@ -16,12 +29,9 @@ inline float64_t to_float64_t(uint64_t x) { return float64_t{x}; }
|
||||
inline uint32_t from_float32_t(float32_t x) { return uint32_t(x.v); }
|
||||
inline uint64_t from_float64_t(float64_t x) { return uint64_t(x.v); }
|
||||
|
||||
inline uint32_t get_fflags() {
|
||||
uint32_t fflags = softfloat_exceptionFlags;
|
||||
if (fflags) {
|
||||
softfloat_exceptionFlags = 0;
|
||||
}
|
||||
return fflags;
|
||||
inline void rv_init(uint32_t frm) {
|
||||
softfloat_exceptionFlags = 0;
|
||||
softfloat_roundingMode = frm;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -29,289 +39,296 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
uint32_t rv_fadd_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_add(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fadd_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_add(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fsub_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_sub(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fsub_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_sub(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmul_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_mul(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fmul_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_mul(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_mulAdd(to_float32_t(a), to_float32_t(b), to_float32_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_mulAdd(to_float64_t(a), to_float64_t(b), to_float64_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto c_neg = c ^ F32_SIGN;
|
||||
auto r = f32_mulAdd(to_float32_t(a), to_float32_t(b), to_float32_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto c_neg = c ^ F64_SIGN;
|
||||
auto r = f64_mulAdd(to_float64_t(a), to_float64_t(b), to_float64_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fnmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto a_neg = a ^ F32_SIGN;
|
||||
auto c_neg = c ^ F32_SIGN;
|
||||
auto r = f32_mulAdd(to_float32_t(a_neg), to_float32_t(b), to_float32_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fnmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto a_neg = a ^ F64_SIGN;
|
||||
auto c_neg = c ^ F64_SIGN;
|
||||
auto r = f64_mulAdd(to_float64_t(a_neg), to_float64_t(b), to_float64_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fnmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto a_neg = a ^ F32_SIGN;
|
||||
auto r = f32_mulAdd(to_float32_t(a_neg), to_float32_t(b), to_float32_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fnmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto a_neg = a ^ F64_SIGN;
|
||||
auto r = f64_mulAdd(to_float64_t(a_neg), to_float64_t(b), to_float64_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fdiv_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_div(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_div(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_sqrt(to_float32_t(a));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_fsqrt_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_sqrt(to_float64_t(a));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_ftoi_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_to_i32(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_ftoi_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_to_i32(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_ftou_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_to_ui32(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_ftou_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_to_ui32(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftol_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_to_i64(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftol_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_to_i64(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftolu_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f32_to_ui64(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftolu_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = f64_to_ui64(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_itof_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = i32_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_itof_d(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = i32_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_utof_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = ui32_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_utof_d(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = ui32_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_ltof_s(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = i64_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_ltof_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = i64_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_lutof_s(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = ui64_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_lutof_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
rv_init(frm);
|
||||
auto r = ui64_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
bool rv_flt_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f32_lt(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_flt_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f64_lt(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_fle_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f32_le(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_fle_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f64_le(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_feq_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f32_eq(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_feq_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
rv_init(0);
|
||||
auto r = f64_eq(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
uint32_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF32UI(a) && isNaNF32UI(b)) {
|
||||
r = defaultNaNF32UI;
|
||||
} else {
|
||||
@@ -324,12 +341,13 @@ uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
r = b;
|
||||
}
|
||||
}
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
uint64_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF64UI(a) && isNaNF64UI(b)) {
|
||||
r = defaultNaNF64UI;
|
||||
} else {
|
||||
@@ -342,12 +360,13 @@ uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
r = b;
|
||||
}
|
||||
}
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fmax_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
uint32_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF32UI(a) && isNaNF32UI(b)) {
|
||||
r = defaultNaNF32UI;
|
||||
} else {
|
||||
@@ -360,12 +379,13 @@ uint32_t rv_fmax_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
r = b;
|
||||
}
|
||||
}
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
uint64_t r;
|
||||
rv_init(0);
|
||||
if (isNaNF64UI(a) && isNaNF64UI(b)) {
|
||||
r = defaultNaNF64UI;
|
||||
} else {
|
||||
@@ -378,7 +398,7 @@ uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
r = b;
|
||||
}
|
||||
}
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,17 @@
|
||||
#ifndef RVFLOATS_H
|
||||
#define RVFLOATS_H
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
@@ -78,5 +90,3 @@ uint64_t rv_ftod(uint32_t a);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
@@ -84,33 +97,39 @@ public:
|
||||
}
|
||||
|
||||
uint64_t pop() {
|
||||
auto cycle = queue_.front().cycle;
|
||||
auto cycles = queue_.front().cycles;
|
||||
queue_.pop();
|
||||
return cycle;
|
||||
return cycles;
|
||||
}
|
||||
|
||||
void tx_callback(const TxCallback& callback) {
|
||||
tx_cb_ = callback;
|
||||
}
|
||||
|
||||
uint64_t arrival_time() const {
|
||||
if (queue_.empty())
|
||||
return 0;
|
||||
return queue_.front().cycles;
|
||||
}
|
||||
|
||||
protected:
|
||||
struct timed_pkt_t {
|
||||
Pkt pkt;
|
||||
uint64_t cycle;
|
||||
uint64_t cycles;
|
||||
};
|
||||
|
||||
std::queue<timed_pkt_t> queue_;
|
||||
SimPort* peer_;
|
||||
TxCallback tx_cb_;
|
||||
|
||||
void push(const Pkt& data, uint64_t cycle) {
|
||||
void push(const Pkt& data, uint64_t cycles) {
|
||||
if (tx_cb_) {
|
||||
tx_cb_(data, cycle);
|
||||
tx_cb_(data, cycles);
|
||||
}
|
||||
if (peer_) {
|
||||
peer_->push(data, cycle);
|
||||
peer_->push(data, cycles);
|
||||
} else {
|
||||
queue_.push({data, cycle});
|
||||
queue_.push({data, cycles});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,14 +148,14 @@ public:
|
||||
|
||||
virtual void fire() const = 0;
|
||||
|
||||
uint64_t time() const {
|
||||
return time_;
|
||||
uint64_t cycles() const {
|
||||
return cycles_;
|
||||
}
|
||||
|
||||
protected:
|
||||
SimEventBase(uint64_t time) : time_(time) {}
|
||||
SimEventBase(uint64_t cycles) : cycles_(cycles) {}
|
||||
|
||||
uint64_t time_;
|
||||
uint64_t cycles_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -150,8 +169,8 @@ public:
|
||||
|
||||
typedef std::function<void (const Pkt&)> Func;
|
||||
|
||||
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t time)
|
||||
: SimEventBase(time)
|
||||
SimCallEvent(const Func& func, const Pkt& pkt, uint64_t cycles)
|
||||
: SimEventBase(cycles)
|
||||
, func_(func)
|
||||
, pkt_(pkt)
|
||||
{}
|
||||
@@ -180,11 +199,11 @@ template <typename Pkt>
|
||||
class SimPortEvent : public SimEventBase {
|
||||
public:
|
||||
void fire() const override {
|
||||
const_cast<SimPort<Pkt>*>(port_)->push(pkt_, time_);
|
||||
const_cast<SimPort<Pkt>*>(port_)->push(pkt_, cycles_);
|
||||
}
|
||||
|
||||
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t time)
|
||||
: SimEventBase(time)
|
||||
SimPortEvent(const SimPort<Pkt>* port, const Pkt& pkt, uint64_t cycles)
|
||||
: SimEventBase(cycles)
|
||||
, port_(port)
|
||||
, pkt_(pkt)
|
||||
{}
|
||||
@@ -330,7 +349,7 @@ public:
|
||||
auto evt_it_end = events_.end();
|
||||
while (evt_it != evt_it_end) {
|
||||
auto& event = *evt_it;
|
||||
if (cycles_ >= event->time()) {
|
||||
if (cycles_ >= event->cycles()) {
|
||||
event->fire();
|
||||
evt_it = events_.erase(evt_it);
|
||||
} else {
|
||||
@@ -395,5 +414,5 @@ void SimPort<Pkt>::send(const Pkt& pkt, uint64_t delay) const {
|
||||
reinterpret_cast<const SimPort<Pkt>*>(peer_)->send(pkt, delay);
|
||||
} else {
|
||||
SimPlatform::instance().schedule(this, pkt, delay);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
78
sim/common/stringutil.h
Normal file
78
sim/common/stringutil.h
Normal file
@@ -0,0 +1,78 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
class ByteStream : public std::istream {
|
||||
public:
|
||||
ByteStream(const void *buf, std::size_t size) : buf_(buf), size_(size) {}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const ByteStream& obj) {
|
||||
auto oldflags = os.flags();
|
||||
auto oldwidth = os.width();
|
||||
auto oldfill = os.fill();
|
||||
for (std::size_t i = 0, n = obj.size_; i < n; ++i) {
|
||||
int byte = *((uint8_t*)obj.buf_ + (n - 1 - i));
|
||||
os << std::hex << std::setw(2) << std::setfill('0') << byte;
|
||||
}
|
||||
os.fill(oldfill);
|
||||
os.width(oldwidth);
|
||||
os.flags(oldflags);
|
||||
return os;
|
||||
}
|
||||
|
||||
private:
|
||||
const void *buf_;
|
||||
std::size_t size_;
|
||||
};
|
||||
|
||||
class IndentStream : public std::streambuf {
|
||||
public:
|
||||
explicit IndentStream(std::streambuf* dest, int indent = 4)
|
||||
: dest_(dest)
|
||||
, isBeginLine_(true)
|
||||
, indent_(indent, ' ')
|
||||
, owner_(nullptr)
|
||||
{}
|
||||
|
||||
explicit IndentStream(std::ostream& dest, int indent = 4)
|
||||
: dest_(dest.rdbuf())
|
||||
, isBeginLine_(true)
|
||||
, indent_(indent, ' ')
|
||||
, owner_(&dest) {
|
||||
owner_->rdbuf(this);
|
||||
}
|
||||
|
||||
virtual ~IndentStream() {
|
||||
if (owner_)
|
||||
owner_->rdbuf(dest_);
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual int overflow(int ch) {
|
||||
if (isBeginLine_ && ch != '\n') {
|
||||
dest_->sputn(indent_.data(), indent_.size());
|
||||
}
|
||||
isBeginLine_ = ch == '\n';
|
||||
return dest_->sputc(ch);
|
||||
}
|
||||
|
||||
private:
|
||||
std::streambuf* dest_;
|
||||
bool isBeginLine_;
|
||||
std::string indent_;
|
||||
std::ostream* owner_;
|
||||
};
|
||||
@@ -1,237 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cocogfx/include/fixed.h>
|
||||
#include <bitmanip.h>
|
||||
|
||||
using namespace cocogfx;
|
||||
|
||||
enum class WrapMode {
|
||||
Clamp,
|
||||
Repeat,
|
||||
Mirror,
|
||||
};
|
||||
|
||||
enum class TexFormat {
|
||||
A8R8G8B8,
|
||||
R5G6B5,
|
||||
A1R5G5B5,
|
||||
A4R4G4B4,
|
||||
A8L8,
|
||||
L8,
|
||||
A8,
|
||||
};
|
||||
|
||||
template <uint32_t F, typename T = int32_t>
|
||||
T Clamp(Fixed<F,T> fx, WrapMode mode) {
|
||||
switch (mode) {
|
||||
case WrapMode::Clamp: return (fx.data() < 0) ? 0 : ((fx.data() > Fixed<F,T>::MASK) ? Fixed<F,T>::MASK : fx.data());
|
||||
case WrapMode::Repeat: return (fx.data() & Fixed<F,T>::MASK);
|
||||
case WrapMode::Mirror: return (bit_get(fx.data(), Fixed<F,T>::FRAC) ? ~fx.data() : fx.data());
|
||||
default:
|
||||
std::abort();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t Stride(TexFormat format) {
|
||||
switch (format) {
|
||||
case TexFormat::A8R8G8B8:
|
||||
return 4;
|
||||
case TexFormat::R5G6B5:
|
||||
case TexFormat::A1R5G5B5:
|
||||
case TexFormat::A4R4G4B4:
|
||||
case TexFormat::A8L8:
|
||||
return 2;
|
||||
case TexFormat::L8:
|
||||
case TexFormat::A8:
|
||||
return 1;
|
||||
default:
|
||||
std::abort();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline void Unpack8888(TexFormat format,
|
||||
uint32_t texel,
|
||||
uint32_t* lo,
|
||||
uint32_t* hi) {
|
||||
uint32_t r, g, b, a;
|
||||
switch (format) {
|
||||
case TexFormat::A8R8G8B8:
|
||||
r = (texel >> 16) & 0xff;
|
||||
g = (texel >> 8) & 0xff;
|
||||
b = texel & 0xff;
|
||||
a = texel >> 24;
|
||||
break;
|
||||
case TexFormat::R5G6B5:
|
||||
r = ((texel >> 11) << 3) | (texel >> 13);
|
||||
g = ((texel >> 3) & 0xfc) | ((texel >> 9) & 0x3);
|
||||
b = ((texel & 0x1f) << 3) | ((texel & 0x1c) >> 2);
|
||||
a = 0xff;
|
||||
break;
|
||||
case TexFormat::A1R5G5B5:
|
||||
r = ((texel >> 7) & 0xf8) | ((texel << 1) >> 13);
|
||||
g = ((texel >> 2) & 0xf8) | ((texel >> 7) & 7);
|
||||
b = ((texel & 0x1f) << 3) | ((texel & 0x1c) >> 2);
|
||||
a = 0xff * (texel >> 15);
|
||||
break;
|
||||
case TexFormat::A4R4G4B4:
|
||||
r = ((texel >> 4) & 0xf0) | ((texel >> 8) & 0x0f);
|
||||
g = ((texel & 0xf0) >> 0) | ((texel & 0xf0) >> 4);
|
||||
b = ((texel & 0x0f) << 4) | ((texel & 0x0f) >> 0);
|
||||
a = ((texel >> 8) & 0xf0) | (texel >> 12);
|
||||
break;
|
||||
case TexFormat::A8L8:
|
||||
r = texel & 0xff;
|
||||
g = r;
|
||||
b = r;
|
||||
a = texel >> 8;
|
||||
break;
|
||||
case TexFormat::L8:
|
||||
r = texel & 0xff;
|
||||
g = r;
|
||||
b = r;
|
||||
a = 0xff;
|
||||
break;
|
||||
case TexFormat::A8:
|
||||
r = 0xff;
|
||||
g = 0xff;
|
||||
b = 0xff;
|
||||
a = texel & 0xff;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
*lo = (r << 16) + b;
|
||||
*hi = (a << 16) + g;
|
||||
}
|
||||
|
||||
inline void Unpack8888(uint32_t texel, uint32_t* lo, uint32_t* hi) {
|
||||
*lo = texel & 0x00ff00ff;
|
||||
*hi = (texel >> 8) & 0x00ff00ff;
|
||||
}
|
||||
|
||||
inline uint32_t Pack8888(uint32_t lo, uint32_t hi) {
|
||||
return (hi << 8) | lo;
|
||||
}
|
||||
|
||||
inline uint32_t Lerp8888(uint32_t a, uint32_t b, uint32_t f) {
|
||||
return (a + (((b - a) * f) >> 8)) & 0x00ff00ff;
|
||||
}
|
||||
|
||||
template <uint32_t F, typename T = int32_t>
|
||||
void TexAddressLinear(Fixed<F,T> fu,
|
||||
Fixed<F,T> fv,
|
||||
uint32_t log_width,
|
||||
uint32_t log_height,
|
||||
WrapMode wrapu,
|
||||
WrapMode wrapv,
|
||||
uint32_t* addr00,
|
||||
uint32_t* addr01,
|
||||
uint32_t* addr10,
|
||||
uint32_t* addr11,
|
||||
uint32_t* alpha,
|
||||
uint32_t* beta
|
||||
) {
|
||||
auto delta_x = Fixed<F,T>::make(Fixed<F,T>::HALF >> log_width);
|
||||
auto delta_y = Fixed<F,T>::make(Fixed<F,T>::HALF >> log_height);
|
||||
|
||||
uint32_t u0 = Clamp(fu - delta_x, wrapu);
|
||||
uint32_t u1 = Clamp(fu + delta_x, wrapu);
|
||||
uint32_t v0 = Clamp(fv - delta_y, wrapv);
|
||||
uint32_t v1 = Clamp(fv + delta_y, wrapv);
|
||||
|
||||
uint32_t shift_u = (Fixed<F,T>::FRAC - log_width);
|
||||
uint32_t shift_v = (Fixed<F,T>::FRAC - log_height);
|
||||
|
||||
uint32_t x0s = (u0 << 8) >> shift_u;
|
||||
uint32_t y0s = (v0 << 8) >> shift_v;
|
||||
|
||||
uint32_t x0 = x0s >> 8;
|
||||
uint32_t y0 = y0s >> 8;
|
||||
uint32_t x1 = u1 >> shift_u;
|
||||
uint32_t y1 = v1 >> shift_v;
|
||||
|
||||
*addr00 = x0 + (y0 << log_width);
|
||||
*addr01 = x1 + (y0 << log_width);
|
||||
*addr10 = x0 + (y1 << log_width);
|
||||
*addr11 = x1 + (y1 << log_width);
|
||||
|
||||
*alpha = x0s & 0xff;
|
||||
*beta = y0s & 0xff;
|
||||
|
||||
//printf("*** fu=0x%x, fv=0x%x, u0=0x%x, u1=0x%x, v0=0x%x, v1=0x%x, x0=0x%x, x1=0x%x, y0=0x%x, y1=0x%x, addr00=0x%x, addr01=0x%x, addr10=0x%x, addr11=0x%x\n", fu.data(), fv.data(), u0, u1, v0, v1, x0, x1, y0, y1, *addr00, *addr01, *addr10, *addr11);
|
||||
}
|
||||
|
||||
template <uint32_t F, typename T = int32_t>
|
||||
void TexAddressPoint(Fixed<F,T> fu,
|
||||
Fixed<F,T> fv,
|
||||
uint32_t log_width,
|
||||
uint32_t log_height,
|
||||
WrapMode wrapu,
|
||||
WrapMode wrapv,
|
||||
uint32_t* addr
|
||||
) {
|
||||
uint32_t u = Clamp(fu, wrapu);
|
||||
uint32_t v = Clamp(fv, wrapv);
|
||||
|
||||
uint32_t x = u >> (Fixed<F,T>::FRAC - log_width);
|
||||
uint32_t y = v >> (Fixed<F,T>::FRAC - log_height);
|
||||
|
||||
*addr = x + (y << log_width);
|
||||
|
||||
//printf("*** fu=0x%x, fv=0x%x, u=0x%x, v=0x%x, x=0x%x, y=0x%x, addr=0x%x\n", fu.data(), fv.data(), u, v, x, y, *addr);
|
||||
}
|
||||
|
||||
inline uint32_t TexFilterLinear(
|
||||
TexFormat format,
|
||||
uint32_t texel00,
|
||||
uint32_t texel01,
|
||||
uint32_t texel10,
|
||||
uint32_t texel11,
|
||||
uint32_t alpha,
|
||||
uint32_t beta
|
||||
) {
|
||||
uint32_t c01l, c01h;
|
||||
{
|
||||
uint32_t c0l, c0h, c1l, c1h;
|
||||
Unpack8888(format, texel00, &c0l, &c0h);
|
||||
Unpack8888(format, texel01, &c1l, &c1h);
|
||||
c01l = Lerp8888(c0l, c1l, alpha);
|
||||
c01h = Lerp8888(c0h, c1h, alpha);
|
||||
}
|
||||
|
||||
uint32_t c23l, c23h;
|
||||
{
|
||||
uint32_t c2l, c2h, c3l, c3h;
|
||||
Unpack8888(format, texel10, &c2l, &c2h);
|
||||
Unpack8888(format, texel11, &c3l, &c3h);
|
||||
c23l = Lerp8888(c2l, c3l, alpha);
|
||||
c23h = Lerp8888(c2h, c3h, alpha);
|
||||
}
|
||||
|
||||
uint32_t color;
|
||||
{
|
||||
uint32_t cl = Lerp8888(c01l, c23l, beta);
|
||||
uint32_t ch = Lerp8888(c01h, c23h, beta);
|
||||
color = Pack8888(cl, ch);
|
||||
}
|
||||
|
||||
//printf("*** texel00=0x%x, texel01=0x%x, texel10=0x%x, texel11=0x%x, color=0x%x\n", texel00, texel01, texel10, texel11, color);
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
inline uint32_t TexFilterPoint(TexFormat format, uint32_t texel) {
|
||||
uint32_t color;
|
||||
{
|
||||
uint32_t cl, ch;
|
||||
Unpack8888(format, texel, &cl, &ch);
|
||||
color = Pack8888(cl, ch);
|
||||
}
|
||||
|
||||
//printf("*** texel=0x%x, color=0x%x\n", texel, color);
|
||||
|
||||
return color;
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "util.h"
|
||||
#include <string.h>
|
||||
|
||||
@@ -7,4 +20,20 @@ const char* fileExtension(const char* filepath) {
|
||||
if (ext == NULL || ext == filepath)
|
||||
return "";
|
||||
return ext + 1;
|
||||
}
|
||||
|
||||
void* aligned_malloc(size_t size, size_t alignment) {
|
||||
// reserve margin for alignment and storing of unaligned address
|
||||
assert((alignment & (alignment - 1)) == 0); // Power of 2 alignment.
|
||||
size_t margin = (alignment-1) + sizeof(void*);
|
||||
void *unaligned_addr = malloc(size + margin);
|
||||
void **aligned_addr = (void**)((uintptr_t)(((uint8_t*)unaligned_addr) + margin) & ~(alignment-1));
|
||||
aligned_addr[-1] = unaligned_addr;
|
||||
return aligned_addr;
|
||||
}
|
||||
|
||||
void aligned_free(void *ptr) {
|
||||
// retreive the stored unaligned address and use it to free the allocation
|
||||
void* unaligned_addr = ((void**)ptr)[-1];
|
||||
free(unaligned_addr);
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
@@ -49,4 +62,7 @@ const char* fileExtension(const char* filepath);
|
||||
#define DISABLE_WARNING_UNUSED_PARAMETER
|
||||
#define DISABLE_WARNING_UNREFERENCED_FUNCTION
|
||||
#define DISABLE_WARNING_ANONYMOUS_STRUCT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void *aligned_malloc(size_t size, size_t alignment);
|
||||
void aligned_free(void *ptr);
|
||||
55
sim/common/uuid_gen.h
Normal file
55
sim/common/uuid_gen.h
Normal file
@@ -0,0 +1,55 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class UUIDGenerator {
|
||||
public:
|
||||
UUIDGenerator() : ids_(0) {}
|
||||
virtual ~UUIDGenerator() {}
|
||||
|
||||
uint32_t get_uuid(uint64_t PC) {
|
||||
uint32_t id;
|
||||
uint32_t ref;
|
||||
auto it = uuid_map_.find(PC);
|
||||
if (it != uuid_map_.end()) {
|
||||
uint64_t value = it->second;
|
||||
id = value & 0xffff;
|
||||
ref = value >> 16;
|
||||
} else {
|
||||
id = ids_++;
|
||||
ref = -1;
|
||||
}
|
||||
++ref;
|
||||
uint64_t ret = (uint64_t(ref) << 16) | id;
|
||||
uuid_map_[PC] = ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
uuid_map_.clear();
|
||||
ids_ = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::unordered_map<uint64_t, uint32_t> uuid_map_;
|
||||
uint32_t ids_;
|
||||
};
|
||||
|
||||
}
|
||||
138
sim/opaesim/Makefile
Normal file
138
sim/opaesim/Makefile
Normal file
@@ -0,0 +1,138 @@
|
||||
XLEN ?= 32
|
||||
DESTDIR ?= .
|
||||
RTL_DIR = ../../hw/rtl
|
||||
DPI_DIR = ../../hw/dpi
|
||||
AFU_DIR = $(RTL_DIR)/afu/opae
|
||||
SCRIPT_DIR = ../../hw/scripts
|
||||
THIRD_PARTY_DIR = ../../third_party
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I.. -I../../../hw -I../../common -I$(abspath $(DESTDIR))
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
LDFLAGS += -shared ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator -pthread
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_ICACHE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_DCACHE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_MEM
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR
|
||||
|
||||
# Control logic analyzer monitors
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_AFU
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_RASTER
|
||||
DBG_SCOPE_FLAGS += -DDBG_SCOPE_MSCHED
|
||||
|
||||
# AFU parameters
|
||||
CONFIGS += -DPLATFORM_PROVIDES_LOCAL_MEMORY
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BANKS,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=2
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=26
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=512
|
||||
endif
|
||||
ifeq (,$(findstring PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH,$(CONFIGS)))
|
||||
CONFIGS += -DPLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH=4
|
||||
endif
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += fpga.cpp opae_sim.cpp
|
||||
|
||||
RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv
|
||||
RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fpu
|
||||
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
|
||||
RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
|
||||
FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
|
||||
endif
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip
|
||||
|
||||
TOP = vortex_afu_shim
|
||||
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += -DSIMULATION
|
||||
VL_FLAGS += -DXLEN_$(XLEN)
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
VL_FLAGS += $(RTL_PKGS)
|
||||
VL_FLAGS += $(DBG_SCOPE_FLAGS)
|
||||
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
|
||||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -O3 -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
VL_FLAGS += -DSCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SCOPE_JSON = $(DESTDIR)/scope.json
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CXXFLAGS += -DNOPAE
|
||||
|
||||
PROJECT = libopae-c-sim.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(DESTDIR)/vortex.xml:
|
||||
verilator --xml-only -O0 $(VL_FLAGS) $(TOP) --xml-output $(DESTDIR)/vortex.xml
|
||||
|
||||
$(DESTDIR)/scope.json: $(DESTDIR)/vortex.xml
|
||||
$(SCRIPT_DIR)/scope.py $(DESTDIR)/vortex.xml -o $(DESTDIR)/scope.json
|
||||
|
||||
$(DESTDIR)/vortex_afu.h : $(AFU_DIR)/vortex_afu.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(AFU_DIR)/vortex_afu.vh -o $(DESTDIR)/vortex_afu.h
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) $(DESTDIR)/vortex_afu.h $(SCOPE_JSON)
|
||||
verilator --build --exe -O3 $(VL_FLAGS) --cc $(TOP) --top-module $(TOP) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(DESTDIR)/$(PROJECT)
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/vortex.xml $(DESTDIR)/scope.json $(DESTDIR)/vortex_afu.h $(DESTDIR)/$(PROJECT)
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
@@ -8,10 +21,61 @@
|
||||
#include "fpga.h"
|
||||
#include "opae_sim.h"
|
||||
#include <VX_config.h>
|
||||
#include <util.h>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern fpga_result fpgaGetProperties(fpga_token token, fpga_properties *prop) {
|
||||
__unused (token, prop);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPropertiesSetObjectType(fpga_properties prop, fpga_objtype objtype) {
|
||||
__unused (prop, objtype);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPropertiesSetGUID(fpga_properties prop, fpga_guid guid) {
|
||||
__unused (prop, guid);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaDestroyProperties(fpga_properties *prop) {
|
||||
__unused (prop);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaEnumerate(const fpga_properties *filters, uint32_t num_filters, fpga_token *tokens, uint32_t max_tokens, uint32_t *num_matches) {
|
||||
__unused (filters, num_filters, num_filters, tokens, max_tokens);
|
||||
if (num_matches) {
|
||||
*num_matches = 1;
|
||||
}
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaDestroyToken(fpga_token *token) {
|
||||
__unused (token);
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaPropertiesGetLocalMemorySize(const fpga_properties *filters, uint64_t* lms) {
|
||||
__unused (filters);
|
||||
if (lms) {
|
||||
#if (XLEN == 64)
|
||||
*lms = 0x200000000; // 8 GB
|
||||
#else
|
||||
*lms = 0x100000000; // 4 GB
|
||||
#endif
|
||||
}
|
||||
return FPGA_OK;
|
||||
}
|
||||
|
||||
extern fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags) {
|
||||
__unused (token);
|
||||
if (NULL == handle || flags != 0)
|
||||
return FPGA_INVALID_PARAM;
|
||||
auto sim = new opae_sim();
|
||||
@@ -83,4 +147,8 @@ extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_
|
||||
|
||||
extern const char *fpgaErrStr(fpga_result e) {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -1,7 +1,20 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __FPGA_H__
|
||||
#define __FPGA_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -21,28 +34,21 @@ typedef enum {
|
||||
FPGA_RECONF_ERROR /**< Error while reconfiguring FPGA */
|
||||
} fpga_result;
|
||||
|
||||
typedef enum {
|
||||
FPGA_DEVICE = 0,
|
||||
FPGA_ACCELERATOR
|
||||
} fpga_objtype;
|
||||
|
||||
typedef void *fpga_handle;
|
||||
|
||||
typedef void *fpga_token;
|
||||
|
||||
fpga_result fpgaOpen(fpga_token token, fpga_handle *handle, int flags);
|
||||
typedef void *fpga_properties;
|
||||
|
||||
fpga_result fpgaClose(fpga_handle handle);
|
||||
|
||||
fpga_result fpgaPrepareBuffer(fpga_handle handle, uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
fpga_result fpgaReleaseBuffer(fpga_handle handle, uint64_t wsid);
|
||||
|
||||
fpga_result fpgaGetIOAddress(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
fpga_result fpgaWriteMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
const char *fpgaErrStr(fpga_result e);
|
||||
typedef uint8_t fpga_guid[16];
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __FPGA_H__
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "opae_sim.h"
|
||||
|
||||
#include <verilated.h>
|
||||
@@ -25,6 +38,7 @@
|
||||
#include <list>
|
||||
#include <queue>
|
||||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
|
||||
@@ -62,6 +76,8 @@
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
|
||||
#define CPU_GPU_LATENCY 200
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
@@ -70,23 +86,6 @@ double sc_time_stamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
static void *__aligned_malloc(size_t alignment, size_t size) {
|
||||
// reserve margin for alignment and storing of unaligned address
|
||||
size_t margin = (alignment-1) + sizeof(void*);
|
||||
void *unaligned_addr = malloc(size + margin);
|
||||
void **aligned_addr = (void**)((uintptr_t)(((uint8_t*)unaligned_addr) + margin) & ~(alignment-1));
|
||||
aligned_addr[-1] = unaligned_addr;
|
||||
return aligned_addr;
|
||||
}
|
||||
|
||||
static void __aligned_free(void *ptr) {
|
||||
// retreive the stored unaligned address and use it to free the allocation
|
||||
void* unaligned_addr = ((void**)ptr)[-1];
|
||||
free(unaligned_addr);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
@@ -158,7 +157,7 @@ public:
|
||||
future_.wait();
|
||||
}
|
||||
for (auto& buffer : host_buffers_) {
|
||||
__aligned_free(buffer.second.data);
|
||||
aligned_free(buffer.second.data);
|
||||
}
|
||||
#ifdef VCD_OUTPUT
|
||||
trace_->close();
|
||||
@@ -176,9 +175,13 @@ public:
|
||||
}
|
||||
|
||||
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags) {
|
||||
auto alloc = __aligned_malloc(CACHE_BLOCK_SIZE, len);
|
||||
auto alloc = aligned_malloc(len, CACHE_BLOCK_SIZE);
|
||||
if (alloc == NULL)
|
||||
return -1;
|
||||
// set uninitialized data to "baadf00d"
|
||||
for (uint32_t i = 0; i < len; ++i) {
|
||||
((uint8_t*)alloc)[i] = (0xbaadf00d >> ((i & 0x3) * 8)) & 0xff;
|
||||
}
|
||||
host_buffer_t buffer;
|
||||
buffer.data = (uint64_t*)alloc;
|
||||
buffer.size = len;
|
||||
@@ -193,7 +196,7 @@ public:
|
||||
void release_buffer(uint64_t wsid) {
|
||||
auto it = host_buffers_.find(wsid);
|
||||
if (it != host_buffers_.end()) {
|
||||
__aligned_free(it->second.data);
|
||||
aligned_free(it->second.data);
|
||||
host_buffers_.erase(it);
|
||||
}
|
||||
}
|
||||
@@ -205,6 +208,11 @@ public:
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
// simulate CPU-GPU latency
|
||||
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
|
||||
this->tick();
|
||||
|
||||
// simulate mmio request
|
||||
device_->vcp2af_sRxPort_c0_mmioRdValid = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
@@ -217,7 +225,12 @@ public:
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
|
||||
// simulate CPU-GPU latency
|
||||
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
|
||||
this->tick();
|
||||
|
||||
// simulate mmio request
|
||||
device_->vcp2af_sRxPort_c0_mmioWrValid = 1;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
|
||||
device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
|
||||
@@ -254,7 +267,14 @@ private:
|
||||
this->eval();
|
||||
}
|
||||
|
||||
device_->reset = 0;
|
||||
device_->reset = 0;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
device_->clk = 0;
|
||||
this->eval();
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
@@ -289,7 +309,7 @@ private:
|
||||
#endif
|
||||
}
|
||||
|
||||
void eval() {
|
||||
void eval() {
|
||||
device_->eval();
|
||||
#ifdef VCD_OUTPUT
|
||||
if (sim_trace_enabled()) {
|
||||
@@ -396,10 +416,10 @@ private:
|
||||
|
||||
// process memory requests
|
||||
assert(!device_->avs_read[b] || !device_->avs_write[b]);
|
||||
unsigned byte_addr = device_->avs_address[b] * MEM_BLOCK_SIZE;
|
||||
unsigned byte_addr = (device_->avs_address[b] * MEMORY_BANKS + b) * MEM_BLOCK_SIZE;
|
||||
if (device_->avs_write[b]) {
|
||||
uint64_t byteen = device_->avs_byteenable[b];
|
||||
uint8_t* data = (uint8_t*)device_->avs_writedata[b].data();
|
||||
uint8_t* data = (uint8_t*)(device_->avs_writedata[b].data());
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
(*ram_)[byte_addr + i] = data[i];
|
||||
@@ -419,8 +439,7 @@ private:
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
}
|
||||
|
||||
} else
|
||||
if (device_->avs_read[b]) {
|
||||
auto mem_req = new mem_rd_req_t();
|
||||
mem_req->addr = device_->avs_address[b];
|
||||
@@ -491,7 +510,7 @@ private:
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
RAM *ram_;
|
||||
RAM* ram_;
|
||||
|
||||
ramulator::Gem5Wrapper* dram_;
|
||||
|
||||
@@ -531,4 +550,4 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value)
|
||||
|
||||
void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
|
||||
impl_->read_mmio64(mmio_num, offset, value);
|
||||
}
|
||||
}
|
||||
43
sim/opaesim/opae_sim.h
Normal file
43
sim/opaesim/opae_sim.h
Normal file
@@ -0,0 +1,43 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
namespace vortex {
|
||||
|
||||
class RAM;
|
||||
|
||||
class opae_sim {
|
||||
public:
|
||||
|
||||
opae_sim();
|
||||
virtual ~opae_sim();
|
||||
|
||||
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
void release_buffer(uint64_t wsid);
|
||||
|
||||
void get_io_address(uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
private:
|
||||
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
}
|
||||
8
sim/opaesim/verilator.vlt
Normal file
8
sim/opaesim/verilator.vlt
Normal file
@@ -0,0 +1,8 @@
|
||||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "*/fpnew/src/*"
|
||||
lint_off -rule UNOPTFLAT -file "*/fpnew/src/*"
|
||||
lint_off -file "*/fpnew/src/*"
|
||||
|
||||
lint_off -file "*/afu/opae/ccip/ccip_if_pkg.sv"
|
||||
lint_off -file "*/afu/opae/local_mem_cfg_pkg.sv"
|
||||
@@ -1,16 +1,24 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
`include "vortex_afu.vh"
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
/* verilator lint_off IMPORTSTAR */
|
||||
import ccip_if_pkg::*;
|
||||
import local_mem_cfg_pkg::*;
|
||||
/* verilator lint_on IMPORTSTAR */
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module vortex_afu_shim (
|
||||
module vortex_afu_shim import local_mem_cfg_pkg::*; import ccip_if_pkg::*; (
|
||||
// global signals
|
||||
input clk,
|
||||
input reset,
|
||||
@@ -167,4 +175,4 @@ assign af2cp_sTxPort_c2_hdr_tid = af2cp_sTxPort.c2.hdr.tid;
|
||||
assign af2cp_sTxPort_c2_mmioRdValid = af2cp_sTxPort.c2.mmioRdValid;
|
||||
assign af2cp_sTxPort_c2_data = af2cp_sTxPort.c2.data;
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
@@ -1,3 +1,4 @@
|
||||
XLEN ?= 32
|
||||
DESTDIR ?= .
|
||||
RTL_DIR = ../../hw/rtl
|
||||
DPI_DIR = ../../hw/dpi
|
||||
@@ -8,6 +9,7 @@ CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../../../hw -I../../common
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
LDFLAGS += ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
@@ -24,12 +26,20 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_GBAR
|
||||
|
||||
DBG_FLAGS += $(DBG_TRACE_FLAGS)
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) $(TEX_INCLUDE)
|
||||
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fpu
|
||||
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
|
||||
RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
|
||||
FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
|
||||
endif
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
@@ -42,14 +52,18 @@ else
|
||||
TOP = Vortex
|
||||
endif
|
||||
|
||||
VL_FLAGS = --exe --cc $(TOP) --top-module $(TOP)
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-EOFNEWLINE
|
||||
VL_FLAGS = --exe
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
|
||||
VL_FLAGS += -DSIMULATION
|
||||
VL_FLAGS += -DXLEN_$(XLEN)
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
VL_FLAGS += $(RTL_PKGS)
|
||||
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
|
||||
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
@@ -59,8 +73,8 @@ VL_FLAGS += -j $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
@@ -72,20 +86,12 @@ ifdef PERF
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# ALU backend
|
||||
VL_FLAGS += -DIMUL_DPI
|
||||
VL_FLAGS += -DIDIV_DPI
|
||||
|
||||
# FPU backend
|
||||
FPU_CORE ?= FPU_DPI
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
PROJECT = rtlsim
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) main.cpp
|
||||
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$@
|
||||
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS) -DSTARTUP_ADDR=0x80000000' -LDFLAGS '$(LDFLAGS)' -o ../$@
|
||||
|
||||
$(DESTDIR)/lib$(PROJECT).so: $(SRCS)
|
||||
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -LDFLAGS '-shared $(LDFLAGS)' -o ../$@
|
||||
|
||||
@@ -1,11 +1,24 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <unistd.h>
|
||||
#include <unistd.h>
|
||||
#include <util.h>
|
||||
#include <mem.h>
|
||||
#include <VX_config.h>
|
||||
#include <VX_types.h>
|
||||
#include "processor.h"
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
@@ -13,11 +26,11 @@
|
||||
using namespace vortex;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Usage: [-r] [-h: help] programs.." << std::endl;
|
||||
std::cout << "Usage: [-r: riscv-test] [-h: help] <program>" << std::endl;
|
||||
}
|
||||
|
||||
bool riscv_test = false;
|
||||
std::vector<const char*> programs;
|
||||
const char* program = nullptr;
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
@@ -35,56 +48,68 @@ static void parse_args(int argc, char **argv) {
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = optind; i < argc; ++i) {
|
||||
programs.push_back(argv[i]);
|
||||
if (optind < argc) {
|
||||
program = argv[optind];
|
||||
std::cout << "Running " << program << "..." << std::endl;
|
||||
} else {
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
int exitcode = 0;
|
||||
bool failed = false;
|
||||
|
||||
parse_args(argc, argv);
|
||||
parse_args(argc, argv);
|
||||
|
||||
// create memory module
|
||||
vortex::RAM ram(RAM_PAGE_SIZE);
|
||||
|
||||
// create processor
|
||||
vortex::Processor processor;
|
||||
|
||||
// attach memory module
|
||||
processor.attach_ram(&ram);
|
||||
|
||||
for (auto program : programs) {
|
||||
std::cout << "Running " << program << "..." << std::endl;
|
||||
// setup base DCRs
|
||||
const uint64_t startup_addr(STARTUP_ADDR);
|
||||
processor.write_dcr(VX_DCR_BASE_STARTUP_ADDR0, startup_addr & 0xffffffff);
|
||||
#if (XLEN == 64)
|
||||
processor.write_dcr(VX_DCR_BASE_STARTUP_ADDR1, startup_addr >> 32);
|
||||
#endif
|
||||
processor.write_dcr(VX_DCR_BASE_MPM_CLASS, 0);
|
||||
|
||||
// load program
|
||||
{
|
||||
std::string program_ext(fileExtension(program));
|
||||
if (program_ext == "bin") {
|
||||
ram.loadBinImage(program, STARTUP_ADDR);
|
||||
ram.loadBinImage(program, startup_addr);
|
||||
} else if (program_ext == "hex") {
|
||||
ram.loadHexImage(program);
|
||||
} else {
|
||||
std::cout << "*** error: only *.bin or *.hex images supported." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
exitcode = processor.run();
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed" << std::endl;
|
||||
} else {
|
||||
std::cout << "Failed: exitcode=" << exitcode << std::endl;
|
||||
failed = true;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
failed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (failed)
|
||||
break;
|
||||
}
|
||||
|
||||
return failed ? exitcode : 0;
|
||||
// run simulation
|
||||
exitcode = processor.run();
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed" << std::endl;
|
||||
exitcode = 0;
|
||||
} else {
|
||||
std::cout << "Failed" << std::endl;
|
||||
exitcode = 1;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "processor.h"
|
||||
|
||||
#include <verilated.h>
|
||||
@@ -56,6 +69,14 @@
|
||||
#define VERILATOR_RESET_VALUE 2
|
||||
#endif
|
||||
|
||||
#if (XLEN == 32)
|
||||
typedef uint32_t Word;
|
||||
#elif (XLEN == 64)
|
||||
typedef uint64_t Word;
|
||||
#else
|
||||
#error unsupported XLEN
|
||||
#endif
|
||||
|
||||
#define VL_WDATA_GETW(lwp, i, n, w) \
|
||||
VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w)
|
||||
|
||||
@@ -71,7 +92,7 @@ double sc_time_stamp() {
|
||||
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
|
||||
bool sim_trace_enabled() {
|
||||
if (timestamp >= trace_start_time
|
||||
@@ -126,6 +147,9 @@ public:
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
@@ -165,27 +189,46 @@ public:
|
||||
std::cout << std::dec << timestamp << ": [sim] run()" << std::endl;
|
||||
#endif
|
||||
|
||||
// reset device
|
||||
this->reset();
|
||||
// start execution
|
||||
running_ = true;
|
||||
device_->reset = 0;
|
||||
|
||||
// execute program
|
||||
// wait on device to go busy
|
||||
while (!device_->busy) {
|
||||
this->tick();
|
||||
}
|
||||
|
||||
// wait on device to go idle
|
||||
while (device_->busy) {
|
||||
if (get_ebreak()) {
|
||||
exitcode = get_last_wb_value(3);
|
||||
exitcode = (int)get_last_wb_value(3);
|
||||
break;
|
||||
}
|
||||
this->tick();
|
||||
}
|
||||
|
||||
// reset device
|
||||
this->reset();
|
||||
|
||||
// wait 5 cycles to flush the pipeline
|
||||
this->wait(5);
|
||||
this->cout_flush();
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
|
||||
void write_dcr(uint32_t addr, uint32_t value) {
|
||||
device_->dcr_wr_valid = 1;
|
||||
device_->dcr_wr_addr = addr;
|
||||
device_->dcr_wr_data = value;
|
||||
while (device_->dcr_wr_valid) {
|
||||
this->tick();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void reset() {
|
||||
void reset() {
|
||||
running_ = false;
|
||||
|
||||
print_bufs_.clear();
|
||||
|
||||
pending_mem_reqs_.clear();
|
||||
@@ -199,6 +242,8 @@ private:
|
||||
this->reset_avs_bus();
|
||||
#endif
|
||||
|
||||
this->reset_dcr_bus();
|
||||
|
||||
device_->reset = 1;
|
||||
|
||||
for (int i = 0; i < RESET_DELAY; ++i) {
|
||||
@@ -206,14 +251,7 @@ private:
|
||||
this->eval();
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
}
|
||||
|
||||
device_->reset = 0;
|
||||
|
||||
// Turn on assertion after reset
|
||||
Verilated::assertOn(true);
|
||||
|
||||
this->cout_flush();
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
@@ -226,6 +264,7 @@ private:
|
||||
#else
|
||||
this->eval_avs_bus(0);
|
||||
#endif
|
||||
this->eval_dcr_bus(0);
|
||||
|
||||
device_->clk = 1;
|
||||
this->eval();
|
||||
@@ -235,6 +274,7 @@ private:
|
||||
#else
|
||||
this->eval_avs_bus(1);
|
||||
#endif
|
||||
this->eval_dcr_bus(1);
|
||||
|
||||
if (MEM_CYCLE_RATIO > 0) {
|
||||
auto cycle = timestamp / 2;
|
||||
@@ -260,6 +300,8 @@ private:
|
||||
#ifdef VCD_OUTPUT
|
||||
if (sim_trace_enabled()) {
|
||||
trace_->dump(timestamp);
|
||||
} else {
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
++timestamp;
|
||||
@@ -268,30 +310,30 @@ private:
|
||||
#ifdef AXI_BUS
|
||||
|
||||
void reset_axi_bus() {
|
||||
device_->m_axi_wready = 0;
|
||||
device_->m_axi_awready = 0;
|
||||
device_->m_axi_arready = 0;
|
||||
device_->m_axi_rvalid = 0;
|
||||
device_->m_axi_bvalid = 0;
|
||||
device_->m_axi_wready[0] = 0;
|
||||
device_->m_axi_awready[0] = 0;
|
||||
device_->m_axi_arready[0] = 0;
|
||||
device_->m_axi_rvalid[0] = 0;
|
||||
device_->m_axi_bvalid[0] = 0;
|
||||
}
|
||||
|
||||
void eval_axi_bus(bool clk) {
|
||||
if (!clk) {
|
||||
mem_rd_rsp_ready_ = device_->m_axi_rready;
|
||||
mem_wr_rsp_ready_ = device_->m_axi_bready;
|
||||
mem_rd_rsp_ready_ = device_->m_axi_rready[0];
|
||||
mem_wr_rsp_ready_ = device_->m_axi_bready[0];
|
||||
return;
|
||||
}
|
||||
|
||||
if (ram_ == nullptr) {
|
||||
device_->m_axi_wready = 0;
|
||||
device_->m_axi_awready = 0;
|
||||
device_->m_axi_arready = 0;
|
||||
device_->m_axi_wready[0] = 0;
|
||||
device_->m_axi_awready[0] = 0;
|
||||
device_->m_axi_arready[0] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// process memory responses
|
||||
if (mem_rd_rsp_active_
|
||||
&& device_->m_axi_rvalid && mem_rd_rsp_ready_) {
|
||||
&& device_->m_axi_rvalid[0] && mem_rd_rsp_ready_) {
|
||||
mem_rd_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_rd_rsp_active_) {
|
||||
@@ -299,30 +341,30 @@ private:
|
||||
&& (*pending_mem_reqs_.begin())->ready
|
||||
&& !(*pending_mem_reqs_.begin())->write) {
|
||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_req = *mem_rsp_it;
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_req->addr);
|
||||
printf("%0ld: [sim] MEM Rd Rsp: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_req->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
device_->m_axi_rvalid = 1;
|
||||
device_->m_axi_rid = mem_req->tag;
|
||||
device_->m_axi_rresp = 0;
|
||||
device_->m_axi_rlast = 1;
|
||||
memcpy((uint8_t*)device_->m_axi_rdata, mem_req->block.data(), MEM_BLOCK_SIZE);
|
||||
device_->m_axi_rvalid[0] = 1;
|
||||
device_->m_axi_rid[0] = mem_rsp->tag;
|
||||
device_->m_axi_rresp[0] = 0;
|
||||
device_->m_axi_rlast[0] = 1;
|
||||
memcpy(device_->m_axi_rdata[0].data(), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
||||
pending_mem_reqs_.erase(mem_rsp_it);
|
||||
mem_rd_rsp_active_ = true;
|
||||
delete mem_req;
|
||||
delete mem_rsp;
|
||||
} else {
|
||||
device_->m_axi_rvalid = 0;
|
||||
device_->m_axi_rvalid[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// send memory write response
|
||||
if (mem_wr_rsp_active_
|
||||
&& device_->m_axi_bvalid && mem_wr_rsp_ready_) {
|
||||
&& device_->m_axi_bvalid[0] && mem_wr_rsp_ready_) {
|
||||
mem_wr_rsp_active_ = false;
|
||||
}
|
||||
if (!mem_wr_rsp_active_) {
|
||||
@@ -330,34 +372,34 @@ private:
|
||||
&& (*pending_mem_reqs_.begin())->ready
|
||||
&& (*pending_mem_reqs_.begin())->write) {
|
||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_req = *mem_rsp_it;
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_req->addr);
|
||||
printf("%0ld: [sim] MEM Wr Rsp: bank=%d, addr=%0lx\n", timestamp, last_mem_rsp_bank_, mem_rsp->addr);
|
||||
*/
|
||||
device_->m_axi_bvalid = 1;
|
||||
device_->m_axi_bid = mem_req->tag;
|
||||
device_->m_axi_bresp = 0;
|
||||
device_->m_axi_bvalid[0] = 1;
|
||||
device_->m_axi_bid[0] = mem_rsp->tag;
|
||||
device_->m_axi_bresp[0] = 0;
|
||||
pending_mem_reqs_.erase(mem_rsp_it);
|
||||
mem_wr_rsp_active_ = true;
|
||||
delete mem_req;
|
||||
delete mem_rsp;
|
||||
} else {
|
||||
device_->m_axi_bvalid = 0;
|
||||
device_->m_axi_bvalid[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// select the memory bank
|
||||
uint32_t req_addr = device_->m_axi_wvalid ? device_->m_axi_awaddr : device_->m_axi_araddr;
|
||||
uint32_t req_addr = device_->m_axi_wvalid[0] ? device_->m_axi_awaddr[0] : device_->m_axi_araddr[0];
|
||||
|
||||
// process memory requests
|
||||
if (device_->m_axi_wvalid || device_->m_axi_arvalid) {
|
||||
if (device_->m_axi_wvalid) {
|
||||
uint64_t byteen = device_->m_axi_wstrb;
|
||||
unsigned base_addr = device_->m_axi_awaddr;
|
||||
uint8_t* data = (uint8_t*)(device_->m_axi_wdata);
|
||||
if ((device_->m_axi_wvalid[0] || device_->m_axi_arvalid[0]) && running_) {
|
||||
if (device_->m_axi_wvalid[0]) {
|
||||
uint64_t byteen = device_->m_axi_wstrb[0];
|
||||
uint64_t base_addr = device_->m_axi_awaddr[0];
|
||||
uint8_t* data = (uint8_t*)device_->m_axi_wdata[0].data();
|
||||
|
||||
// check console output
|
||||
if (base_addr >= IO_COUT_ADDR
|
||||
&& base_addr < (IO_COUT_ADDR + IO_COUT_SIZE)) {
|
||||
if (base_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& base_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
@@ -384,15 +426,15 @@ private:
|
||||
}
|
||||
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->m_axi_awid;
|
||||
mem_req->addr = device_->m_axi_awaddr;
|
||||
mem_req->tag = device_->m_axi_awid[0];
|
||||
mem_req->addr = device_->m_axi_awaddr[0];
|
||||
mem_req->write = true;
|
||||
mem_req->ready = true;
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
device_->m_axi_awaddr,
|
||||
device_->m_axi_awaddr[0],
|
||||
ramulator::Request::Type::WRITE,
|
||||
0
|
||||
);
|
||||
@@ -401,18 +443,18 @@ private:
|
||||
} else {
|
||||
// process reads
|
||||
auto mem_req = new mem_req_t();
|
||||
mem_req->tag = device_->m_axi_arid;
|
||||
mem_req->addr = device_->m_axi_araddr;
|
||||
ram_->read(mem_req->block.data(), device_->m_axi_araddr, MEM_BLOCK_SIZE);
|
||||
mem_req->tag = device_->m_axi_arid[0];
|
||||
mem_req->addr = device_->m_axi_araddr[0];
|
||||
ram_->read(mem_req->block.data(), device_->m_axi_araddr[0], MEM_BLOCK_SIZE);
|
||||
mem_req->write = false;
|
||||
mem_req->ready = false;
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
device_->m_axi_araddr,
|
||||
device_->m_axi_araddr[0],
|
||||
ramulator::Request::Type::READ,
|
||||
std::bind([](ramulator::Request& dram_req, mem_req_t* mem_req) {
|
||||
std::bind([&](ramulator::Request& dram_req, mem_req_t* mem_req) {
|
||||
mem_req->ready = true;
|
||||
}, placeholders::_1, mem_req),
|
||||
0
|
||||
@@ -421,9 +463,9 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
device_->m_axi_wready = 1;
|
||||
device_->m_axi_awready = 1;
|
||||
device_->m_axi_arready = 1;
|
||||
device_->m_axi_wready[0] = running_;
|
||||
device_->m_axi_awready[0] = running_;
|
||||
device_->m_axi_arready[0] = running_;
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -454,35 +496,35 @@ private:
|
||||
&& (*pending_mem_reqs_.begin())->ready) {
|
||||
device_->mem_rsp_valid = 1;
|
||||
auto mem_rsp_it = pending_mem_reqs_.begin();
|
||||
auto mem_req = *mem_rsp_it;
|
||||
auto mem_rsp = *mem_rsp_it;
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Rd: bank=%d, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_req->addr);
|
||||
printf("%0ld: [sim] MEM Rd: bank=%d, tag=%0lx, addr=%0lx, data=", timestamp, last_mem_rsp_bank_, mem_rsp->tag, mem_rsp->addr);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", mem_req->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
printf("%02x", mem_rsp->block[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
memcpy(device_->mem_rsp_data.data(), mem_req->block.data(), MEM_BLOCK_SIZE);
|
||||
device_->mem_rsp_tag = mem_req->tag;
|
||||
memcpy(device_->mem_rsp_data.data(), mem_rsp->block.data(), MEM_BLOCK_SIZE);
|
||||
device_->mem_rsp_tag = mem_rsp->tag;
|
||||
pending_mem_reqs_.erase(mem_rsp_it);
|
||||
mem_rd_rsp_active_ = true;
|
||||
delete mem_req;
|
||||
delete mem_rsp;
|
||||
} else {
|
||||
device_->mem_rsp_valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// process memory requests
|
||||
if (device_->mem_req_valid) {
|
||||
uint32_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
if (device_->mem_req_valid && running_) {
|
||||
uint64_t byte_addr = (device_->mem_req_addr * MEM_BLOCK_SIZE);
|
||||
if (device_->mem_req_rw) {
|
||||
// process writes
|
||||
uint64_t byteen = device_->mem_req_byteen;
|
||||
uint8_t* data = (uint8_t*)device_->mem_req_data.data();
|
||||
uint8_t* data = (uint8_t*)(device_->mem_req_data.data());
|
||||
|
||||
// check console output
|
||||
if (byte_addr >= IO_COUT_ADDR
|
||||
&& byte_addr < (IO_COUT_ADDR + IO_COUT_SIZE)) {
|
||||
if (byte_addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
for (int i = 0; i < IO_COUT_SIZE; i++) {
|
||||
if ((byteen >> i) & 0x1) {
|
||||
auto& ss_buf = print_bufs_[i];
|
||||
@@ -496,7 +538,7 @@ private:
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
printf("%0ld: [sim] MEM Wr: addr=%0x, byteen=%0lx, data=", timestamp, byte_addr, byteen);
|
||||
printf("%0ld: [sim] MEM Wr: tag=%0lx, addr=%0x, byteen=%0lx, data=", timestamp, device_->mem_req_tag, byte_addr, byteen);
|
||||
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
|
||||
printf("%02x", data[(MEM_BLOCK_SIZE-1)-i]);
|
||||
}
|
||||
@@ -515,7 +557,7 @@ private:
|
||||
0
|
||||
);
|
||||
dram_queue_.push(dram_req);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// process reads
|
||||
auto mem_req = new mem_req_t();
|
||||
@@ -526,11 +568,13 @@ private:
|
||||
ram_->read(mem_req->block.data(), byte_addr, MEM_BLOCK_SIZE);
|
||||
pending_mem_reqs_.emplace_back(mem_req);
|
||||
|
||||
//printf("%0ld: [sim] MEM Rd Req: addr=%0x, tag=%0lx\n", timestamp, byte_addr, device_->mem_req_tag);
|
||||
|
||||
// send dram request
|
||||
ramulator::Request dram_req(
|
||||
byte_addr,
|
||||
ramulator::Request::Type::READ,
|
||||
std::bind([](ramulator::Request& dram_req, mem_req_t* mem_req) {
|
||||
std::bind([&](ramulator::Request& dram_req, mem_req_t* mem_req) {
|
||||
mem_req->ready = true;
|
||||
}, placeholders::_1, mem_req),
|
||||
0
|
||||
@@ -539,11 +583,24 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
device_->mem_req_ready = 1;
|
||||
device_->mem_req_ready = running_;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void reset_dcr_bus() {
|
||||
device_->dcr_wr_valid = 0;
|
||||
}
|
||||
|
||||
void eval_dcr_bus(bool clk) {
|
||||
if (!clk) {
|
||||
return;
|
||||
}
|
||||
if (device_->dcr_wr_valid) {
|
||||
device_->dcr_wr_valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void wait(uint32_t cycles) {
|
||||
for (int i = 0; i < cycles; ++i) {
|
||||
this->tick();
|
||||
@@ -552,17 +609,17 @@ private:
|
||||
|
||||
bool get_ebreak() const {
|
||||
#ifdef AXI_BUS
|
||||
return (bool)device_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
|
||||
return (bool)device_->Vortex_axi->vortex->sim_ebreak;
|
||||
#else
|
||||
return (bool)device_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->execute->ebreak;
|
||||
return (bool)device_->Vortex->sim_ebreak;
|
||||
#endif
|
||||
}
|
||||
|
||||
int get_last_wb_value(int reg) const {
|
||||
uint64_t get_last_wb_value(int reg) const {
|
||||
#ifdef AXI_BUS
|
||||
return (int)device_->Vortex_axi->vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||
return ((Word*)device_->Vortex_axi->vortex->sim_wb_value.data())[reg];
|
||||
#else
|
||||
return (int)device_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||
return ((Word*)device_->Vortex->sim_wb_value.data())[reg];
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -600,6 +657,8 @@ private:
|
||||
ramulator::Gem5Wrapper* dram_;
|
||||
|
||||
std::queue<ramulator::Request> dram_queue_;
|
||||
|
||||
bool running_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -618,4 +677,8 @@ void Processor::attach_ram(RAM* mem) {
|
||||
|
||||
int Processor::run() {
|
||||
return impl_->run();
|
||||
}
|
||||
|
||||
void Processor::write_dcr(uint32_t addr, uint32_t value) {
|
||||
return impl_->write_dcr(addr, value);
|
||||
}
|
||||
@@ -1,5 +1,20 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class RAM;
|
||||
@@ -14,6 +29,8 @@ public:
|
||||
|
||||
int run();
|
||||
|
||||
void write_dcr(uint32_t addr, uint32_t value);
|
||||
|
||||
private:
|
||||
|
||||
class Impl;
|
||||
|
||||
@@ -1,10 +1,5 @@
|
||||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNOPTFLAT -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule WIDTH -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNUSED -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule LITENDIAN -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule IMPORTSTAR -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule PINCONNECTEMPTY -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -file "../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule BLKANDNBLK -file "*/fpnew/src/*"
|
||||
lint_off -rule UNOPTFLAT -file "*/fpnew/src/*"
|
||||
lint_off -file "*/fpnew/src/*"
|
||||
|
||||
@@ -1,45 +1,36 @@
|
||||
XLEN ?= 32
|
||||
DESTDIR ?= .
|
||||
RTL_DIR = ../hw/rtl
|
||||
THIRD_PARTY_DIR = ../../third_party
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I. -I../common -I../../hw
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/cocogfx -lcocogfx
|
||||
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += args.cpp cache.cpp memsim.cpp warp.cpp core.cpp decode.cpp execute.cpp exeunit.cpp tex_unit.cpp processor.cpp
|
||||
|
||||
OBJS := $(patsubst %.cpp, obj_dir/%.o, $(notdir $(SRCS)))
|
||||
VPATH := $(sort $(dir $(SRCS)))
|
||||
|
||||
#$(info OBJS is $(OBJS))
|
||||
#$(info VPATH is $(VPATH))
|
||||
SRCS += processor.cpp cluster.cpp core.cpp warp.cpp decode.cpp execute.cpp exe_unit.cpp cache_sim.cpp mem_sim.cpp shared_mem.cpp dcrs.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG)
|
||||
#CXXFLAGS += -g -O0 -DDEBUG_LEVEL=$(DEBUG) -fsanitize=address -fno-omit-frame-pointer
|
||||
else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
# XLEN parameterization
|
||||
ifdef XLEN
|
||||
CXXFLAGS += -DXLEN=$(XLEN)
|
||||
endif
|
||||
|
||||
PROJECT = simx
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) main.cpp
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
$(CXX) $(CXXFLAGS) -DSTARTUP_ADDR=0x80000000 $^ $(LDFLAGS) -o $@
|
||||
|
||||
$(DESTDIR)/lib$(PROJECT).so: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@
|
||||
@@ -48,4 +39,4 @@ $(DESTDIR)/lib$(PROJECT).so: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so
|
||||
rm -rf $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so
|
||||
|
||||
87
sim/simx/arch.h
Normal file
87
sim/simx/arch.h
Normal file
@@ -0,0 +1,87 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <stdio.h>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Arch {
|
||||
private:
|
||||
uint16_t num_threads_;
|
||||
uint16_t num_warps_;
|
||||
uint16_t num_cores_;
|
||||
uint16_t num_clusters_;
|
||||
uint16_t vsize_;
|
||||
uint16_t num_regs_;
|
||||
uint16_t num_csrs_;
|
||||
uint16_t num_barriers_;
|
||||
uint16_t ipdom_size_;
|
||||
|
||||
public:
|
||||
Arch(uint16_t num_threads, uint16_t num_warps, uint16_t num_cores, uint16_t num_clusters)
|
||||
: num_threads_(num_threads)
|
||||
, num_warps_(num_warps)
|
||||
, num_cores_(num_cores)
|
||||
, num_clusters_(num_clusters)
|
||||
, vsize_(16)
|
||||
, num_regs_(32)
|
||||
, num_csrs_(4096)
|
||||
, num_barriers_(NUM_BARRIERS)
|
||||
, ipdom_size_((num_threads-1) * 2)
|
||||
{}
|
||||
|
||||
uint16_t vsize() const {
|
||||
return vsize_;
|
||||
}
|
||||
|
||||
uint16_t num_regs() const {
|
||||
return num_regs_;
|
||||
}
|
||||
|
||||
uint16_t num_csrs() const {
|
||||
return num_csrs_;
|
||||
}
|
||||
|
||||
uint16_t num_barriers() const {
|
||||
return num_barriers_;
|
||||
}
|
||||
|
||||
uint16_t ipdom_size() const {
|
||||
return ipdom_size_;
|
||||
}
|
||||
|
||||
uint16_t num_threads() const {
|
||||
return num_threads_;
|
||||
}
|
||||
|
||||
uint16_t num_warps() const {
|
||||
return num_warps_;
|
||||
}
|
||||
|
||||
uint16_t num_cores() const {
|
||||
return num_cores_;
|
||||
}
|
||||
|
||||
uint16_t num_clusters() const {
|
||||
return num_clusters_;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <stdio.h>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ArchDef {
|
||||
private:
|
||||
uint16_t num_cores_;
|
||||
uint16_t num_warps_;
|
||||
uint16_t num_threads_;
|
||||
uint16_t wsize_;
|
||||
uint16_t vsize_;
|
||||
uint16_t num_regs_;
|
||||
uint16_t num_csrs_;
|
||||
uint16_t num_barriers_;
|
||||
|
||||
public:
|
||||
ArchDef(uint16_t num_cores,
|
||||
uint16_t num_warps,
|
||||
uint16_t num_threads)
|
||||
: num_cores_(num_cores)
|
||||
, num_warps_(num_warps)
|
||||
, num_threads_(num_threads)
|
||||
, wsize_(4)
|
||||
, vsize_(16)
|
||||
, num_regs_(32)
|
||||
, num_csrs_(4096)
|
||||
, num_barriers_(NUM_BARRIERS)
|
||||
{}
|
||||
|
||||
uint16_t wsize() const {
|
||||
return wsize_;
|
||||
}
|
||||
|
||||
uint16_t vsize() const {
|
||||
return vsize_;
|
||||
}
|
||||
|
||||
uint16_t num_regs() const {
|
||||
return num_regs_;
|
||||
}
|
||||
|
||||
uint16_t num_csrs() const {
|
||||
return num_csrs_;
|
||||
}
|
||||
|
||||
uint16_t num_barriers() const {
|
||||
return num_barriers_;
|
||||
}
|
||||
|
||||
uint16_t num_threads() const {
|
||||
return num_threads_;
|
||||
}
|
||||
|
||||
uint16_t num_warps() const {
|
||||
return num_warps_;
|
||||
}
|
||||
|
||||
uint16_t num_cores() const {
|
||||
return num_cores_;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "args.h"
|
||||
|
||||
using namespace vortex;
|
||||
using std::string;
|
||||
|
||||
std::string CommandLineArg::helpString_;
|
||||
std::unordered_map<string, CommandLineArg *> CommandLineArg::longArgs_;
|
||||
std::unordered_map<string, CommandLineArg *> CommandLineArg::shortArgs_;
|
||||
|
||||
CommandLineArg::CommandLineArg(string s, string l, const char *helpText) {
|
||||
helpString_ += helpText;
|
||||
longArgs_[l] = this;
|
||||
shortArgs_[s] = this;
|
||||
}
|
||||
|
||||
CommandLineArg::CommandLineArg(string l, const char *helpText) {
|
||||
helpString_ += helpText;
|
||||
longArgs_[l] = this;
|
||||
}
|
||||
|
||||
void CommandLineArg::readArgs(int argc, char **argv) {
|
||||
for (int i = 0; i < argc; i++) {
|
||||
std::unordered_map<string, CommandLineArg *>::iterator
|
||||
s = shortArgs_.find(std::string(argv[i])),
|
||||
l = longArgs_.find(std::string(argv[i]));
|
||||
|
||||
if (s != shortArgs_.end()) {
|
||||
i += s->second->read(argc - i, &argv[i]);
|
||||
} else if (l != longArgs_.end()) {
|
||||
i += l->second->read(argc - i, &argv[i]);
|
||||
} else {
|
||||
throw BadArg(string(argv[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CommandLineArg::clearArgs() {
|
||||
shortArgs_.clear();
|
||||
longArgs_.clear();
|
||||
helpString_ = "";
|
||||
}
|
||||
|
||||
void CommandLineArg::showHelp(std::ostream &os) {
|
||||
os << helpString_;
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
struct BadArg { BadArg(std::string s) : arg(s) {} std::string arg; };
|
||||
|
||||
class CommandLineArg {
|
||||
public:
|
||||
CommandLineArg(std::string s, std::string l, const char *helpText);
|
||||
CommandLineArg(std::string l, const char *helpText);
|
||||
virtual int read(int argc, char** argv) = 0;
|
||||
|
||||
static void readArgs(int argc, char **argv);
|
||||
static void clearArgs();
|
||||
static void showHelp(std::ostream &os);
|
||||
|
||||
private:
|
||||
static std::string helpString_;
|
||||
static std::unordered_map<std::string, CommandLineArg *> longArgs_;
|
||||
static std::unordered_map<std::string, CommandLineArg *> shortArgs_;
|
||||
};
|
||||
|
||||
template <typename T> class CommandLineArgSetter : public CommandLineArg {
|
||||
public:
|
||||
CommandLineArgSetter(std::string s, std::string l, const char *ht, T &x) :
|
||||
CommandLineArg(s, l, ht), arg_(x) {}
|
||||
|
||||
CommandLineArgSetter(std::string l, const char *ht, T &x) :
|
||||
CommandLineArg(l, ht), arg_(x) {}
|
||||
|
||||
int read(int argc, char **argv) {
|
||||
__unused (argc);
|
||||
std::istringstream iss(argv[1]);
|
||||
iss >> arg_;
|
||||
return 1;
|
||||
}
|
||||
private:
|
||||
T &arg_;
|
||||
};
|
||||
|
||||
class CommandLineArgFlag : public CommandLineArg {
|
||||
public:
|
||||
CommandLineArgFlag(std::string s, std::string l, const char *ht, bool &x) :
|
||||
CommandLineArg(s, l, ht), arg_(x) { arg_ = false; }
|
||||
|
||||
CommandLineArgFlag(std::string l, const char *ht, bool &x) :
|
||||
CommandLineArg(l, ht), arg_(x) { arg_ = false; }
|
||||
|
||||
int read(int argc, char **argv) {
|
||||
__unused (argc, argv);
|
||||
arg_ = true;
|
||||
return 0;
|
||||
}
|
||||
private:
|
||||
bool &arg_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,637 +0,0 @@
|
||||
#include "cache.h"
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include <util.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <queue>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
struct params_t {
|
||||
uint32_t sets_per_bank;
|
||||
uint32_t blocks_per_set;
|
||||
uint32_t words_per_block;
|
||||
uint32_t log2_num_inputs;
|
||||
|
||||
uint32_t word_select_addr_start;
|
||||
uint32_t word_select_addr_end;
|
||||
|
||||
uint32_t bank_select_addr_start;
|
||||
uint32_t bank_select_addr_end;
|
||||
|
||||
uint32_t set_select_addr_start;
|
||||
uint32_t set_select_addr_end;
|
||||
|
||||
uint32_t tag_select_addr_start;
|
||||
uint32_t tag_select_addr_end;
|
||||
|
||||
params_t(const Cache::Config& config) {
|
||||
uint32_t bank_bits = log2ceil(config.num_banks);
|
||||
uint32_t offset_bits = config.B - config.W;
|
||||
uint32_t log2_bank_size = config.C - bank_bits;
|
||||
uint32_t index_bits = log2_bank_size - (config.B << config.A);
|
||||
assert(log2_bank_size >= config.B);
|
||||
|
||||
this->log2_num_inputs = log2ceil(config.num_inputs);
|
||||
|
||||
this->words_per_block = 1 << offset_bits;
|
||||
this->blocks_per_set = 1 << config.A;
|
||||
this->sets_per_bank = 1 << index_bits;
|
||||
|
||||
assert(config.ports_per_bank <= this->words_per_block);
|
||||
|
||||
// Word select
|
||||
this->word_select_addr_start = config.W;
|
||||
this->word_select_addr_end = (this->word_select_addr_start+offset_bits-1);
|
||||
|
||||
// Bank select
|
||||
this->bank_select_addr_start = (1+this->word_select_addr_end);
|
||||
this->bank_select_addr_end = (this->bank_select_addr_start+bank_bits-1);
|
||||
|
||||
// Set select
|
||||
this->set_select_addr_start = (1+this->bank_select_addr_end);
|
||||
this->set_select_addr_end = (this->set_select_addr_start+index_bits-1);
|
||||
|
||||
// Tag select
|
||||
this->tag_select_addr_start = (1+this->set_select_addr_end);
|
||||
this->tag_select_addr_end = (config.addr_width-1);
|
||||
}
|
||||
|
||||
uint32_t addr_bank_id(uint64_t word_addr) const {
|
||||
if (bank_select_addr_end >= bank_select_addr_start)
|
||||
return (uint32_t)bit_getw(word_addr, bank_select_addr_start, bank_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t addr_set_id(uint64_t word_addr) const {
|
||||
if (set_select_addr_end >= set_select_addr_start)
|
||||
return (uint32_t)bit_getw(word_addr, set_select_addr_start, set_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t addr_tag(uint64_t word_addr) const {
|
||||
if (tag_select_addr_end >= tag_select_addr_start)
|
||||
return bit_getw(word_addr, tag_select_addr_start, tag_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t mem_addr(uint32_t bank_id, uint32_t set_id, uint64_t tag) const {
|
||||
uint64_t addr(0);
|
||||
if (bank_select_addr_end >= bank_select_addr_start)
|
||||
addr = bit_setw(addr, bank_select_addr_start, bank_select_addr_end, bank_id);
|
||||
if (set_select_addr_end >= set_select_addr_start)
|
||||
addr = bit_setw(addr, set_select_addr_start, set_select_addr_end, set_id);
|
||||
if (tag_select_addr_end >= tag_select_addr_start)
|
||||
addr = bit_setw(addr, tag_select_addr_start, tag_select_addr_end, tag);
|
||||
return addr;
|
||||
}
|
||||
};
|
||||
|
||||
struct block_t {
|
||||
bool valid;
|
||||
bool dirty;
|
||||
uint64_t tag;
|
||||
uint32_t lru_ctr;
|
||||
};
|
||||
|
||||
struct set_t {
|
||||
std::vector<block_t> blocks;
|
||||
set_t(uint32_t size) : blocks(size) {}
|
||||
|
||||
void clear() {
|
||||
for (auto& block : blocks) {
|
||||
block.valid = false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct bank_req_info_t {
|
||||
bool valid;
|
||||
uint32_t req_id;
|
||||
uint64_t req_tag;
|
||||
};
|
||||
|
||||
struct bank_req_t {
|
||||
bool valid;
|
||||
bool write;
|
||||
bool mshr_replay;
|
||||
uint64_t tag;
|
||||
uint32_t set_id;
|
||||
uint32_t core_id;
|
||||
uint64_t uuid;
|
||||
std::vector<bank_req_info_t> infos;
|
||||
|
||||
bank_req_t(uint32_t size)
|
||||
: valid(false)
|
||||
, write(false)
|
||||
, mshr_replay(false)
|
||||
, tag(0)
|
||||
, set_id(0)
|
||||
, core_id(0)
|
||||
, uuid(0)
|
||||
, infos(size)
|
||||
{}
|
||||
};
|
||||
|
||||
struct mshr_entry_t : public bank_req_t {
|
||||
uint32_t block_id;
|
||||
|
||||
mshr_entry_t(uint32_t size = 0)
|
||||
: bank_req_t(size)
|
||||
, block_id(0)
|
||||
{}
|
||||
};
|
||||
|
||||
class MSHR {
|
||||
private:
|
||||
std::vector<mshr_entry_t> entries_;
|
||||
uint32_t size_;
|
||||
|
||||
public:
|
||||
MSHR(uint32_t size)
|
||||
: entries_(size)
|
||||
, size_(0)
|
||||
{}
|
||||
|
||||
bool empty() const {
|
||||
return (0 == size_);
|
||||
}
|
||||
|
||||
bool full() const {
|
||||
return (size_ == entries_.size());
|
||||
}
|
||||
|
||||
int lookup(const bank_req_t& bank_req) {
|
||||
for (uint32_t i = 0, n = entries_.size(); i < n; ++i) {
|
||||
auto& entry = entries_.at(i);
|
||||
if (entry.valid
|
||||
&& entry.set_id == bank_req.set_id
|
||||
&& entry.tag == bank_req.tag) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int allocate(const bank_req_t& bank_req, uint32_t block_id) {
|
||||
for (uint32_t i = 0, n = entries_.size(); i < n; ++i) {
|
||||
auto& entry = entries_.at(i);
|
||||
if (!entry.valid) {
|
||||
*(bank_req_t*)&entry = bank_req;
|
||||
entry.valid = true;
|
||||
entry.mshr_replay = false;
|
||||
entry.block_id = block_id;
|
||||
++size_;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
mshr_entry_t& replay(uint32_t id) {
|
||||
auto& root_entry = entries_.at(id);
|
||||
assert(root_entry.valid);
|
||||
// make all related mshr entries for replay
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.valid
|
||||
&& entry.set_id == root_entry.set_id
|
||||
&& entry.tag == root_entry.tag) {
|
||||
entry.mshr_replay = true;
|
||||
}
|
||||
}
|
||||
return root_entry;
|
||||
}
|
||||
|
||||
bool pop(bank_req_t* out) {
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.valid && entry.mshr_replay) {
|
||||
*out = entry;
|
||||
entry.valid = false;
|
||||
--size_;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.valid && entry.mshr_replay) {
|
||||
entry.valid = false;
|
||||
}
|
||||
}
|
||||
size_ = 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct bank_t {
|
||||
std::vector<set_t> sets;
|
||||
MSHR mshr;
|
||||
|
||||
bank_t(const Cache::Config& config,
|
||||
const params_t& params)
|
||||
: sets(params.sets_per_bank, params.blocks_per_set)
|
||||
, mshr(config.mshr_size)
|
||||
{}
|
||||
|
||||
void clear() {
|
||||
mshr.clear();
|
||||
for (auto& set : sets) {
|
||||
set.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class Cache::Impl {
|
||||
private:
|
||||
Cache* const simobject_;
|
||||
Config config_;
|
||||
params_t params_;
|
||||
std::vector<bank_t> banks_;
|
||||
Switch<MemReq, MemRsp>::Ptr mem_switch_;
|
||||
Switch<MemReq, MemRsp>::Ptr bypass_switch_;
|
||||
std::vector<SimPort<MemReq>> mem_req_ports_;
|
||||
std::vector<SimPort<MemRsp>> mem_rsp_ports_;
|
||||
uint32_t flush_cycles_;
|
||||
PerfStats perf_stats_;
|
||||
uint64_t pending_read_reqs_;
|
||||
uint64_t pending_write_reqs_;
|
||||
uint64_t pending_fill_reqs_;
|
||||
|
||||
public:
|
||||
Impl(Cache* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, params_(config)
|
||||
, banks_(config.num_banks, {config, params_})
|
||||
, mem_req_ports_(config.num_banks, simobject)
|
||||
, mem_rsp_ports_(config.num_banks, simobject)
|
||||
{
|
||||
bypass_switch_ = Switch<MemReq, MemRsp>::Create("bypass_arb", ArbiterType::Priority, 2);
|
||||
bypass_switch_->ReqOut.bind(&simobject->MemReqPort);
|
||||
simobject->MemRspPort.bind(&bypass_switch_->RspIn);
|
||||
|
||||
if (config.num_banks > 1) {
|
||||
mem_switch_ = Switch<MemReq, MemRsp>::Create("mem_arb", ArbiterType::RoundRobin, config.num_banks);
|
||||
for (uint32_t i = 0, n = config.num_banks; i < n; ++i) {
|
||||
mem_req_ports_.at(i).bind(&mem_switch_->ReqIn.at(i));
|
||||
mem_switch_->RspOut.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
}
|
||||
mem_switch_->ReqOut.bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspOut.at(0).bind(&mem_switch_->RspIn);
|
||||
} else {
|
||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspOut.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
}
|
||||
|
||||
// calculate tag flush cycles
|
||||
flush_cycles_ = params_.sets_per_bank * params_.blocks_per_set;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& bank : banks_) {
|
||||
bank.clear();
|
||||
}
|
||||
perf_stats_ = PerfStats();
|
||||
pending_read_reqs_ = 0;
|
||||
pending_write_reqs_ = 0;
|
||||
pending_fill_reqs_ = 0;
|
||||
}
|
||||
|
||||
void tick() {
|
||||
// wait on flush cycles
|
||||
if (flush_cycles_ != 0) {
|
||||
--flush_cycles_;
|
||||
return;
|
||||
}
|
||||
|
||||
// per-bank pipeline request
|
||||
std::vector<bank_req_t> pipeline_reqs(config_.num_banks, config_.ports_per_bank);
|
||||
|
||||
// calculate memory latency
|
||||
perf_stats_.mem_latency += pending_fill_reqs_;
|
||||
|
||||
// handle bypasss responses
|
||||
auto& bypass_port = bypass_switch_->RspOut.at(1);
|
||||
if (!bypass_port.empty()) {
|
||||
auto& mem_rsp = bypass_port.front();
|
||||
uint32_t req_id = mem_rsp.tag & ((1 << params_.log2_num_inputs)-1);
|
||||
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
|
||||
MemRsp core_rsp{tag, mem_rsp.core_id, mem_rsp.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-" << core_rsp);
|
||||
bypass_port.pop();
|
||||
}
|
||||
|
||||
// handle MSHR replay
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& pipeline_req = pipeline_reqs.at(bank_id);
|
||||
bank.mshr.pop(&pipeline_req);
|
||||
}
|
||||
|
||||
// handle memory fills
|
||||
std::vector<bool> pending_fill_req(config_.num_banks, false);
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& mem_rsp_port = mem_rsp_ports_.at(bank_id);
|
||||
if (!mem_rsp_port.empty()) {
|
||||
auto& mem_rsp = mem_rsp_port.front();
|
||||
this->processMemoryFill(bank_id, mem_rsp.tag);
|
||||
pending_fill_req.at(bank_id) = true;
|
||||
mem_rsp_port.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// handle incoming core requests
|
||||
for (uint32_t req_id = 0, n = config_.num_inputs; req_id < n; ++req_id) {
|
||||
auto& core_req_port = simobject_->CoreReqPorts.at(req_id);
|
||||
if (core_req_port.empty())
|
||||
continue;
|
||||
|
||||
auto& core_req = core_req_port.front();
|
||||
|
||||
// check cache bypassing
|
||||
if (core_req.non_cacheable) {
|
||||
// send IO request
|
||||
this->processIORequest(core_req, req_id);
|
||||
|
||||
// remove request
|
||||
core_req_port.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
auto bank_id = params_.addr_bank_id(core_req.addr);
|
||||
auto set_id = params_.addr_set_id(core_req.addr);
|
||||
auto tag = params_.addr_tag(core_req.addr);
|
||||
auto port_id = req_id % config_.ports_per_bank;
|
||||
|
||||
// create bank request
|
||||
bank_req_t bank_req(config_.ports_per_bank);
|
||||
bank_req.valid = true;
|
||||
bank_req.write = core_req.write;
|
||||
bank_req.mshr_replay = false;
|
||||
bank_req.tag = tag;
|
||||
bank_req.set_id = set_id;
|
||||
bank_req.core_id = core_req.core_id;
|
||||
bank_req.uuid = core_req.uuid;
|
||||
bank_req.infos.at(port_id) = {true, req_id, core_req.tag};
|
||||
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& pipeline_req = pipeline_reqs.at(bank_id);
|
||||
|
||||
// check pending MSHR replay
|
||||
if (pipeline_req.valid
|
||||
&& pipeline_req.mshr_replay) {
|
||||
// stall
|
||||
continue;
|
||||
}
|
||||
|
||||
// check pending fill request
|
||||
if (pending_fill_req.at(bank_id)) {
|
||||
// stall
|
||||
continue;
|
||||
}
|
||||
|
||||
// check MSHR capacity if read or writeback
|
||||
if ((!core_req.write || !config_.write_through)
|
||||
&& bank.mshr.full()) {
|
||||
++perf_stats_.mshr_stalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
// check bank conflicts
|
||||
if (pipeline_req.valid) {
|
||||
// check port conflict
|
||||
if (pipeline_req.write != core_req.write
|
||||
|| pipeline_req.set_id != set_id
|
||||
|| pipeline_req.tag != tag
|
||||
|| pipeline_req.infos[port_id].valid) {
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
// update pending request infos
|
||||
pipeline_req.infos[port_id] = bank_req.infos[port_id];
|
||||
} else {
|
||||
// schedule new request
|
||||
pipeline_req = bank_req;
|
||||
}
|
||||
|
||||
if (core_req.write)
|
||||
++perf_stats_.writes;
|
||||
else
|
||||
++perf_stats_.reads;
|
||||
|
||||
// remove request
|
||||
auto time = core_req_port.pop();
|
||||
perf_stats_.pipeline_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
// process active request
|
||||
this->processBankRequest(pipeline_reqs);
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void processIORequest(const MemReq& core_req, uint32_t req_id) {
|
||||
{
|
||||
MemReq mem_req(core_req);
|
||||
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
|
||||
bypass_switch_->ReqIn.at(1).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
}
|
||||
|
||||
if (core_req.write && config_.write_reponse) {
|
||||
MemRsp core_rsp{core_req.tag, core_req.core_id, core_req.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).send(core_rsp, 1);
|
||||
DT(3, simobject_->name() << "-" << core_rsp);
|
||||
}
|
||||
}
|
||||
|
||||
void processMemoryFill(uint32_t bank_id, uint32_t mshr_id) {
|
||||
// update block
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& entry = bank.mshr.replay(mshr_id);
|
||||
auto& set = bank.sets.at(entry.set_id);
|
||||
auto& block = set.blocks.at(entry.block_id);
|
||||
block.valid = true;
|
||||
block.tag = entry.tag;
|
||||
--pending_fill_reqs_;
|
||||
}
|
||||
|
||||
void processBankRequest(const std::vector<bank_req_t>& pipeline_reqs) {
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& pipeline_req = pipeline_reqs.at(bank_id);
|
||||
if (!pipeline_req.valid)
|
||||
continue;
|
||||
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& set = bank.sets.at(pipeline_req.set_id);
|
||||
|
||||
if (pipeline_req.mshr_replay) {
|
||||
// send core response
|
||||
for (auto& info : pipeline_req.infos) {
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.core_id, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-" << core_rsp);
|
||||
}
|
||||
} else {
|
||||
bool hit = false;
|
||||
bool found_free_block = false;
|
||||
uint32_t hit_block_id = 0;
|
||||
uint32_t repl_block_id = 0;
|
||||
uint32_t max_cnt = 0;
|
||||
|
||||
for (uint32_t i = 0, n = set.blocks.size(); i < n; ++i) {
|
||||
auto& block = set.blocks.at(i);
|
||||
if (block.valid) {
|
||||
if (block.tag == pipeline_req.tag) {
|
||||
block.lru_ctr = 0;
|
||||
hit_block_id = i;
|
||||
hit = true;
|
||||
} else {
|
||||
++block.lru_ctr;
|
||||
}
|
||||
if (max_cnt < block.lru_ctr) {
|
||||
max_cnt = block.lru_ctr;
|
||||
repl_block_id = i;
|
||||
}
|
||||
} else {
|
||||
found_free_block = true;
|
||||
repl_block_id = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (hit) {
|
||||
//
|
||||
// Hit handling
|
||||
//
|
||||
if (pipeline_req.write) {
|
||||
// handle write hit
|
||||
auto& hit_block = set.blocks.at(hit_block_id);
|
||||
if (config_.write_through) {
|
||||
// forward write request to memory
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, hit_block.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
} else {
|
||||
// mark block as dirty
|
||||
hit_block.dirty = true;
|
||||
}
|
||||
}
|
||||
// send core response
|
||||
if (!pipeline_req.write || config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.infos) {
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.core_id, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-" << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//
|
||||
// Miss handling
|
||||
//
|
||||
if (pipeline_req.write)
|
||||
++perf_stats_.write_misses;
|
||||
else
|
||||
++perf_stats_.read_misses;
|
||||
|
||||
if (!found_free_block && !config_.write_through) {
|
||||
// write back dirty block
|
||||
auto& repl_block = set.blocks.at(repl_block_id);
|
||||
if (repl_block.dirty) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, repl_block.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
++perf_stats_.evictions;
|
||||
}
|
||||
}
|
||||
|
||||
if (pipeline_req.write && config_.write_through) {
|
||||
// forward write request to memory
|
||||
{
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
}
|
||||
// send core response
|
||||
if (config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.infos) {
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.core_id, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-" << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// MSHR lookup
|
||||
int pending = bank.mshr.lookup(pipeline_req);
|
||||
|
||||
// allocate MSHR
|
||||
int mshr_id = bank.mshr.allocate(pipeline_req, repl_block_id);
|
||||
|
||||
// send fill request
|
||||
if (pending == -1) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
|
||||
mem_req.write = false;
|
||||
mem_req.tag = mshr_id;
|
||||
mem_req.core_id = pipeline_req.core_id;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-" << mem_req);
|
||||
++pending_fill_reqs_;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Cache::Cache(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<Cache>(ctx, name)
|
||||
, CoreReqPorts(config.num_inputs, this)
|
||||
, CoreRspPorts(config.num_inputs, this)
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
Cache::~Cache() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void Cache::reset() {
|
||||
impl_->reset();
|
||||
}
|
||||
|
||||
void Cache::tick() {
|
||||
impl_->tick();
|
||||
}
|
||||
|
||||
const Cache::PerfStats& Cache::perf_stats() const {
|
||||
return impl_->perf_stats();
|
||||
}
|
||||
106
sim/simx/cache_cluster.h
Normal file
106
sim/simx/cache_cluster.h
Normal file
@@ -0,0 +1,106 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cache_sim.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class CacheCluster : public SimObject<CacheCluster> {
|
||||
public:
|
||||
std::vector<std::vector<SimPort<MemReq>>> CoreReqPorts;
|
||||
std::vector<std::vector<SimPort<MemRsp>>> CoreRspPorts;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
|
||||
CacheCluster(const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t num_units,
|
||||
uint32_t num_caches,
|
||||
uint32_t num_requests,
|
||||
const CacheSim::Config& config)
|
||||
: SimObject(ctx, name)
|
||||
, CoreReqPorts(num_units, std::vector<SimPort<MemReq>>(num_requests, this))
|
||||
, CoreRspPorts(num_units, std::vector<SimPort<MemRsp>>(num_requests, this))
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, caches_(MAX(num_caches, 0x1)) {
|
||||
|
||||
CacheSim::Config config2(config);
|
||||
if (0 == num_caches) {
|
||||
num_caches = 1;
|
||||
config2.bypass = true;
|
||||
}
|
||||
|
||||
char sname[100];
|
||||
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> unit_arbs(num_units);
|
||||
for (uint32_t u = 0; u < num_units; ++u) {
|
||||
snprintf(sname, 100, "%s-unit-arb-%d", name, u);
|
||||
unit_arbs.at(u) = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::RoundRobin, num_requests, config.num_inputs);
|
||||
for (uint32_t i = 0; i < num_requests; ++i) {
|
||||
this->CoreReqPorts.at(u).at(i).bind(&unit_arbs.at(u)->ReqIn.at(i));
|
||||
unit_arbs.at(u)->RspIn.at(i).bind(&this->CoreRspPorts.at(u).at(i));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> mem_arbs(config.num_inputs);
|
||||
for (uint32_t i = 0; i < config.num_inputs; ++i) {
|
||||
snprintf(sname, 100, "%s-mem-arb-%d", name, i);
|
||||
mem_arbs.at(i) = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::RoundRobin, num_units, num_caches);
|
||||
for (uint32_t u = 0; u < num_units; ++u) {
|
||||
unit_arbs.at(u)->ReqOut.at(i).bind(&mem_arbs.at(i)->ReqIn.at(u));
|
||||
mem_arbs.at(i)->RspIn.at(u).bind(&unit_arbs.at(u)->RspOut.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
snprintf(sname, 100, "%s-cache-arb", name);
|
||||
auto cache_arb = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::RoundRobin, num_caches, 1);
|
||||
|
||||
for (uint32_t i = 0; i < num_caches; ++i) {
|
||||
snprintf(sname, 100, "%s-cache%d", name, i);
|
||||
caches_.at(i) = CacheSim::Create(sname, config2);
|
||||
|
||||
for (uint32_t j = 0; j < config.num_inputs; ++j) {
|
||||
mem_arbs.at(j)->ReqOut.at(i).bind(&caches_.at(i)->CoreReqPorts.at(j));
|
||||
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
|
||||
}
|
||||
|
||||
caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
|
||||
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
|
||||
}
|
||||
|
||||
cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
|
||||
this->MemRspPort.bind(&cache_arb->RspOut.at(0));
|
||||
}
|
||||
|
||||
~CacheCluster() {}
|
||||
|
||||
void reset() {}
|
||||
|
||||
void tick() {}
|
||||
|
||||
CacheSim::PerfStats perf_stats() const {
|
||||
CacheSim::PerfStats perf;
|
||||
for (auto cache : caches_) {
|
||||
perf += cache->perf_stats();
|
||||
}
|
||||
return perf;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<CacheSim::Ptr> caches_;
|
||||
};
|
||||
|
||||
}
|
||||
707
sim/simx/cache_sim.cpp
Normal file
707
sim/simx/cache_sim.cpp
Normal file
@@ -0,0 +1,707 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cache_sim.h"
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include <util.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <queue>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
struct params_t {
|
||||
uint32_t sets_per_bank;
|
||||
uint32_t lines_per_set;
|
||||
uint32_t words_per_line;
|
||||
uint32_t log2_num_inputs;
|
||||
|
||||
uint32_t word_select_addr_start;
|
||||
uint32_t word_select_addr_end;
|
||||
|
||||
uint32_t bank_select_addr_start;
|
||||
uint32_t bank_select_addr_end;
|
||||
|
||||
uint32_t set_select_addr_start;
|
||||
uint32_t set_select_addr_end;
|
||||
|
||||
uint32_t tag_select_addr_start;
|
||||
uint32_t tag_select_addr_end;
|
||||
|
||||
params_t(const CacheSim::Config& config) {
|
||||
int32_t bank_bits = log2ceil(config.num_banks);
|
||||
int32_t offset_bits = config.B - config.W;
|
||||
int32_t log2_bank_size = config.C - bank_bits;
|
||||
int32_t index_bits = log2_bank_size - (config.B + config.A);
|
||||
assert(log2_bank_size > 0);
|
||||
assert(offset_bits >= 0);
|
||||
assert(index_bits >= 0);
|
||||
|
||||
this->log2_num_inputs = log2ceil(config.num_inputs);
|
||||
|
||||
this->words_per_line = 1 << offset_bits;
|
||||
this->lines_per_set = 1 << config.A;
|
||||
this->sets_per_bank = 1 << index_bits;
|
||||
|
||||
assert(config.ports_per_bank <= this->words_per_line);
|
||||
|
||||
// Word select
|
||||
this->word_select_addr_start = config.W;
|
||||
this->word_select_addr_end = (this->word_select_addr_start+offset_bits-1);
|
||||
|
||||
// Bank select
|
||||
this->bank_select_addr_start = (1+this->word_select_addr_end);
|
||||
this->bank_select_addr_end = (this->bank_select_addr_start+bank_bits-1);
|
||||
|
||||
// Set select
|
||||
this->set_select_addr_start = (1+this->bank_select_addr_end);
|
||||
this->set_select_addr_end = (this->set_select_addr_start+index_bits-1);
|
||||
|
||||
// Tag select
|
||||
this->tag_select_addr_start = (1+this->set_select_addr_end);
|
||||
this->tag_select_addr_end = (config.addr_width-1);
|
||||
}
|
||||
|
||||
uint32_t addr_bank_id(uint64_t word_addr) const {
|
||||
if (bank_select_addr_end >= bank_select_addr_start)
|
||||
return (uint32_t)bit_getw(word_addr, bank_select_addr_start, bank_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t addr_set_id(uint64_t word_addr) const {
|
||||
if (set_select_addr_end >= set_select_addr_start)
|
||||
return (uint32_t)bit_getw(word_addr, set_select_addr_start, set_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t addr_tag(uint64_t word_addr) const {
|
||||
if (tag_select_addr_end >= tag_select_addr_start)
|
||||
return bit_getw(word_addr, tag_select_addr_start, tag_select_addr_end);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t mem_addr(uint32_t bank_id, uint32_t set_id, uint64_t tag) const {
|
||||
uint64_t addr(0);
|
||||
if (bank_select_addr_end >= bank_select_addr_start)
|
||||
addr = bit_setw(addr, bank_select_addr_start, bank_select_addr_end, bank_id);
|
||||
if (set_select_addr_end >= set_select_addr_start)
|
||||
addr = bit_setw(addr, set_select_addr_start, set_select_addr_end, set_id);
|
||||
if (tag_select_addr_end >= tag_select_addr_start)
|
||||
addr = bit_setw(addr, tag_select_addr_start, tag_select_addr_end, tag);
|
||||
return addr;
|
||||
}
|
||||
};
|
||||
|
||||
struct line_t {
|
||||
uint64_t tag;
|
||||
uint32_t lru_ctr;
|
||||
bool valid;
|
||||
bool dirty;
|
||||
|
||||
void clear() {
|
||||
valid = false;
|
||||
dirty = false;
|
||||
}
|
||||
};
|
||||
|
||||
struct set_t {
|
||||
std::vector<line_t> lines;
|
||||
|
||||
set_t(uint32_t num_ways)
|
||||
: lines(num_ways)
|
||||
{}
|
||||
|
||||
void clear() {
|
||||
for (auto& line : lines) {
|
||||
line.clear();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct bank_req_port_t {
|
||||
uint32_t req_id;
|
||||
uint64_t req_tag;
|
||||
bool valid;
|
||||
|
||||
void clear() {
|
||||
valid = false;
|
||||
}
|
||||
};
|
||||
|
||||
struct bank_req_t {
|
||||
|
||||
enum ReqType {
|
||||
None = 0,
|
||||
Fill = 1,
|
||||
Replay = 2,
|
||||
Core = 3
|
||||
};
|
||||
|
||||
std::vector<bank_req_port_t> ports;
|
||||
uint64_t tag;
|
||||
uint32_t set_id;
|
||||
uint32_t cid;
|
||||
uint64_t uuid;
|
||||
ReqType type;
|
||||
bool write;
|
||||
|
||||
bank_req_t(uint32_t num_ports)
|
||||
: ports(num_ports)
|
||||
{}
|
||||
|
||||
void clear() {
|
||||
for (auto& port : ports) {
|
||||
port.clear();
|
||||
}
|
||||
type = ReqType::None;
|
||||
}
|
||||
};
|
||||
|
||||
struct mshr_entry_t {
|
||||
bank_req_t bank_req;
|
||||
uint32_t line_id;
|
||||
|
||||
mshr_entry_t(uint32_t num_ports)
|
||||
: bank_req(num_ports)
|
||||
{}
|
||||
|
||||
void clear() {
|
||||
bank_req.clear();
|
||||
}
|
||||
};
|
||||
|
||||
class MSHR {
|
||||
private:
|
||||
std::vector<mshr_entry_t> entries_;
|
||||
uint32_t size_;
|
||||
|
||||
public:
|
||||
MSHR(uint32_t size, uint32_t num_ports)
|
||||
: entries_(size, num_ports)
|
||||
, size_(0)
|
||||
{}
|
||||
|
||||
bool empty() const {
|
||||
return (0 == size_);
|
||||
}
|
||||
|
||||
bool full() const {
|
||||
return (size_ == entries_.size());
|
||||
}
|
||||
|
||||
bool lookup(const bank_req_t& bank_req) {
|
||||
for (auto& entry : entries_) {;
|
||||
if (entry.bank_req.type != bank_req_t::None
|
||||
&& entry.bank_req.set_id == bank_req.set_id
|
||||
&& entry.bank_req.tag == bank_req.tag) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int allocate(const bank_req_t& bank_req, uint32_t line_id) {
|
||||
for (uint32_t i = 0, n = entries_.size(); i < n; ++i) {
|
||||
auto& entry = entries_.at(i);
|
||||
if (entry.bank_req.type == bank_req_t::None) {
|
||||
entry.bank_req = bank_req;
|
||||
entry.line_id = line_id;
|
||||
++size_;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
mshr_entry_t& replay(uint32_t id) {
|
||||
auto& root_entry = entries_.at(id);
|
||||
assert(root_entry.bank_req.type == bank_req_t::Core);
|
||||
// mark all related mshr entries for replay
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.bank_req.type == bank_req_t::Core
|
||||
&& entry.bank_req.set_id == root_entry.bank_req.set_id
|
||||
&& entry.bank_req.tag == root_entry.bank_req.tag) {
|
||||
entry.bank_req.type = bank_req_t::Replay;
|
||||
}
|
||||
}
|
||||
return root_entry;
|
||||
}
|
||||
|
||||
bool pop(bank_req_t* out) {
|
||||
for (auto& entry : entries_) {
|
||||
if (entry.bank_req.type == bank_req_t::Replay) {
|
||||
*out = entry.bank_req;
|
||||
entry.bank_req.type = bank_req_t::None;
|
||||
--size_;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (auto& entry : entries_) {
|
||||
entry.clear();
|
||||
}
|
||||
size_ = 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct bank_t {
|
||||
std::vector<set_t> sets;
|
||||
MSHR mshr;
|
||||
|
||||
bank_t(const CacheSim::Config& config,
|
||||
const params_t& params)
|
||||
: sets(params.sets_per_bank, params.lines_per_set)
|
||||
, mshr(config.mshr_size, config.ports_per_bank)
|
||||
{}
|
||||
|
||||
void clear() {
|
||||
for (auto& set : sets) {
|
||||
set.clear();
|
||||
}
|
||||
mshr.clear();
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class CacheSim::Impl {
|
||||
private:
|
||||
CacheSim* const simobject_;
|
||||
Config config_;
|
||||
params_t params_;
|
||||
std::vector<bank_t> banks_;
|
||||
Switch<MemReq, MemRsp>::Ptr bank_switch_;
|
||||
Switch<MemReq, MemRsp>::Ptr bypass_switch_;
|
||||
std::vector<SimPort<MemReq>> mem_req_ports_;
|
||||
std::vector<SimPort<MemRsp>> mem_rsp_ports_;
|
||||
std::vector<bank_req_t> pipeline_reqs_;
|
||||
uint32_t init_cycles_;
|
||||
PerfStats perf_stats_;
|
||||
uint64_t pending_read_reqs_;
|
||||
uint64_t pending_write_reqs_;
|
||||
uint64_t pending_fill_reqs_;
|
||||
|
||||
public:
|
||||
Impl(CacheSim* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, params_(config)
|
||||
, banks_(config.num_banks, {config, params_})
|
||||
, mem_req_ports_(config.num_banks, simobject)
|
||||
, mem_rsp_ports_(config.num_banks, simobject)
|
||||
, pipeline_reqs_(config.num_banks, config.ports_per_bank)
|
||||
{
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "%s-bypass-arb", simobject->name().c_str());
|
||||
|
||||
if (config_.bypass) {
|
||||
bypass_switch_ = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::RoundRobin, config_.num_inputs);
|
||||
for (uint32_t i = 0; i < config_.num_inputs; ++i) {
|
||||
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
|
||||
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
|
||||
}
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
|
||||
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));
|
||||
return;
|
||||
}
|
||||
|
||||
bypass_switch_ = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::Priority, 2);
|
||||
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
|
||||
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));
|
||||
|
||||
if (config.num_banks > 1) {
|
||||
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
|
||||
bank_switch_ = Switch<MemReq, MemRsp>::Create(sname, ArbiterType::RoundRobin, config.num_banks);
|
||||
for (uint32_t i = 0, n = config.num_banks; i < n; ++i) {
|
||||
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
|
||||
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
|
||||
}
|
||||
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
|
||||
} else {
|
||||
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
|
||||
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
|
||||
}
|
||||
|
||||
// calculate cache initialization cycles
|
||||
init_cycles_ = params_.sets_per_bank * params_.lines_per_set;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
if (config_.bypass)
|
||||
return;
|
||||
|
||||
for (auto& bank : banks_) {
|
||||
bank.clear();
|
||||
}
|
||||
perf_stats_ = PerfStats();
|
||||
pending_read_reqs_ = 0;
|
||||
pending_write_reqs_ = 0;
|
||||
pending_fill_reqs_ = 0;
|
||||
}
|
||||
|
||||
void tick() {
|
||||
if (config_.bypass)
|
||||
return;
|
||||
|
||||
// wait on cache initialization cycles
|
||||
if (init_cycles_ != 0) {
|
||||
--init_cycles_;
|
||||
return;
|
||||
}
|
||||
|
||||
// handle cache bypasss responses
|
||||
{
|
||||
auto& bypass_port = bypass_switch_->RspIn.at(1);
|
||||
if (!bypass_port.empty()) {
|
||||
auto& mem_rsp = bypass_port.front();
|
||||
this->processBypassResponse(mem_rsp);
|
||||
bypass_port.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// initialize pipeline request
|
||||
for (auto& pipeline_req : pipeline_reqs_) {
|
||||
pipeline_req.clear();
|
||||
}
|
||||
|
||||
// schedule MSHR replay
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& pipeline_req = pipeline_reqs_.at(bank_id);
|
||||
bank.mshr.pop(&pipeline_req);
|
||||
}
|
||||
|
||||
// schedule memory fill
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& mem_rsp_port = mem_rsp_ports_.at(bank_id);
|
||||
if (mem_rsp_port.empty())
|
||||
continue;
|
||||
|
||||
auto& pipeline_req = pipeline_reqs_.at(bank_id);
|
||||
if (pipeline_req.type != bank_req_t::None)
|
||||
continue;
|
||||
|
||||
auto& mem_rsp = mem_rsp_port.front();
|
||||
DT(3, simobject_->name() << "-dram-" << mem_rsp);
|
||||
pipeline_req.type = bank_req_t::Fill;
|
||||
pipeline_req.tag = mem_rsp.tag;
|
||||
mem_rsp_port.pop();
|
||||
}
|
||||
|
||||
// schedule core requests
|
||||
for (uint32_t req_id = 0, n = config_.num_inputs; req_id < n; ++req_id) {
|
||||
auto& core_req_port = simobject_->CoreReqPorts.at(req_id);
|
||||
if (core_req_port.empty())
|
||||
continue;
|
||||
|
||||
auto& core_req = core_req_port.front();
|
||||
|
||||
// check cache bypassing
|
||||
if (core_req.type == AddrType::IO) {
|
||||
// send bypass request
|
||||
this->processBypassRequest(core_req, req_id);
|
||||
// remove request
|
||||
core_req_port.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
auto bank_id = params_.addr_bank_id(core_req.addr);
|
||||
auto set_id = params_.addr_set_id(core_req.addr);
|
||||
auto tag = params_.addr_tag(core_req.addr);
|
||||
auto port_id = req_id % config_.ports_per_bank;
|
||||
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& pipeline_req = pipeline_reqs_.at(bank_id);
|
||||
|
||||
// check MSHR capacity
|
||||
if ((!core_req.write || !config_.write_through)
|
||||
&& bank.mshr.full()) {
|
||||
++perf_stats_.mshr_stalls;
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
// check bank conflicts
|
||||
if (pipeline_req.type == bank_req_t::Core) {
|
||||
// check port conflict
|
||||
if (pipeline_req.write != core_req.write
|
||||
|| pipeline_req.set_id != set_id
|
||||
|| pipeline_req.tag != tag
|
||||
|| pipeline_req.ports.at(port_id).valid) {
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
// extend request ports
|
||||
pipeline_req.ports.at(port_id) = bank_req_port_t{req_id, core_req.tag, true};
|
||||
} else if (pipeline_req.type == bank_req_t::None) {
|
||||
// schedule new request
|
||||
bank_req_t bank_req(config_.ports_per_bank);
|
||||
bank_req.ports.at(port_id) = bank_req_port_t{req_id, core_req.tag, true};
|
||||
bank_req.tag = tag;
|
||||
bank_req.set_id = set_id;
|
||||
bank_req.cid = core_req.cid;
|
||||
bank_req.uuid = core_req.uuid;
|
||||
bank_req.type = bank_req_t::Core;
|
||||
bank_req.write = core_req.write;
|
||||
pipeline_req = bank_req;
|
||||
} else {
|
||||
// bank in use
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (core_req.write)
|
||||
++perf_stats_.writes;
|
||||
else
|
||||
++perf_stats_.reads;
|
||||
|
||||
// remove request
|
||||
DT(3, simobject_->name() << "-core-" << core_req);
|
||||
auto time = core_req_port.pop();
|
||||
perf_stats_.pipeline_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
// process active request
|
||||
this->processBankRequests();
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void processBypassResponse(const MemRsp& mem_rsp) {
|
||||
uint32_t req_id = mem_rsp.tag & ((1 << params_.log2_num_inputs)-1);
|
||||
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
|
||||
MemRsp core_rsp{tag, mem_rsp.cid, mem_rsp.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
|
||||
void processBypassRequest(const MemReq& core_req, uint32_t req_id) {
|
||||
DT(3, simobject_->name() << "-core-" << core_req);
|
||||
|
||||
{
|
||||
MemReq mem_req(core_req);
|
||||
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
|
||||
bypass_switch_->ReqIn.at(1).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
}
|
||||
|
||||
if (core_req.write && config_.write_reponse) {
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid, core_req.uuid};
|
||||
simobject_->CoreRspPorts.at(req_id).send(core_rsp, 1);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
|
||||
void processBankRequests() {
|
||||
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto pipeline_req = pipeline_reqs_.at(bank_id);
|
||||
|
||||
switch (pipeline_req.type) {
|
||||
case bank_req_t::None:
|
||||
break;
|
||||
case bank_req_t::Fill: {
|
||||
// update cache line
|
||||
auto& bank = banks_.at(bank_id);
|
||||
auto& entry = bank.mshr.replay(pipeline_req.tag);
|
||||
auto& set = bank.sets.at(entry.bank_req.set_id);
|
||||
auto& line = set.lines.at(entry.line_id);
|
||||
line.valid = true;
|
||||
line.tag = entry.bank_req.tag;
|
||||
--pending_fill_reqs_;
|
||||
} break;
|
||||
case bank_req_t::Replay: {
|
||||
// send core response
|
||||
if (!pipeline_req.write || config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.ports) {
|
||||
if (!info.valid)
|
||||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case bank_req_t::Core: {
|
||||
bool hit = false;
|
||||
bool found_free_line = false;
|
||||
uint32_t hit_line_id = 0;
|
||||
uint32_t repl_line_id = 0;
|
||||
uint32_t max_cnt = 0;
|
||||
|
||||
auto& set = bank.sets.at(pipeline_req.set_id);
|
||||
|
||||
// tag lookup
|
||||
for (uint32_t i = 0, n = set.lines.size(); i < n; ++i) {
|
||||
auto& line = set.lines.at(i);
|
||||
if (line.valid) {
|
||||
if (line.tag == pipeline_req.tag) {
|
||||
line.lru_ctr = 0;
|
||||
hit_line_id = i;
|
||||
hit = true;
|
||||
} else {
|
||||
++line.lru_ctr;
|
||||
}
|
||||
if (max_cnt < line.lru_ctr) {
|
||||
max_cnt = line.lru_ctr;
|
||||
repl_line_id = i;
|
||||
}
|
||||
} else {
|
||||
found_free_line = true;
|
||||
repl_line_id = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (hit) {
|
||||
//
|
||||
// Hit handling
|
||||
//
|
||||
if (pipeline_req.write) {
|
||||
// handle write hit
|
||||
auto& hit_line = set.lines.at(hit_line_id);
|
||||
if (config_.write_through) {
|
||||
// forward write request to memory
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, hit_line.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
} else {
|
||||
// mark line as dirty
|
||||
hit_line.dirty = true;
|
||||
}
|
||||
}
|
||||
// send core response
|
||||
if (!pipeline_req.write || config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.ports) {
|
||||
if (!info.valid)
|
||||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//
|
||||
// Miss handling
|
||||
//
|
||||
if (pipeline_req.write)
|
||||
++perf_stats_.write_misses;
|
||||
else
|
||||
++perf_stats_.read_misses;
|
||||
|
||||
if (!found_free_line && !config_.write_through) {
|
||||
// write back dirty line
|
||||
auto& repl_line = set.lines.at(repl_line_id);
|
||||
if (repl_line.dirty) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, repl_line.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
++perf_stats_.evictions;
|
||||
}
|
||||
}
|
||||
|
||||
if (pipeline_req.write && config_.write_through) {
|
||||
// forward write request to memory
|
||||
{
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
|
||||
mem_req.write = true;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
}
|
||||
// send core response
|
||||
if (config_.write_reponse) {
|
||||
for (auto& info : pipeline_req.ports) {
|
||||
if (!info.valid)
|
||||
continue;
|
||||
MemRsp core_rsp{info.req_tag, pipeline_req.cid, pipeline_req.uuid};
|
||||
simobject_->CoreRspPorts.at(info.req_id).send(core_rsp, config_.latency);
|
||||
DT(3, simobject_->name() << "-core-" << core_rsp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// MSHR lookup
|
||||
auto mshr_pending = bank.mshr.lookup(pipeline_req);
|
||||
|
||||
// allocate MSHR
|
||||
auto mshr_id = bank.mshr.allocate(pipeline_req, repl_line_id);
|
||||
|
||||
// send fill request
|
||||
if (!mshr_pending) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
|
||||
mem_req.write = false;
|
||||
mem_req.tag = mshr_id;
|
||||
mem_req.cid = pipeline_req.cid;
|
||||
mem_req.uuid = pipeline_req.uuid;
|
||||
mem_req_ports_.at(bank_id).send(mem_req, 1);
|
||||
DT(3, simobject_->name() << "-dram-" << mem_req);
|
||||
++pending_fill_reqs_;
|
||||
}
|
||||
}
|
||||
}
|
||||
} break;
|
||||
}
|
||||
}
|
||||
// calculate memory latency
|
||||
perf_stats_.mem_latency += pending_fill_reqs_;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<CacheSim>(ctx, name)
|
||||
, CoreReqPorts(config.num_inputs, this)
|
||||
, CoreRspPorts(config.num_inputs, this)
|
||||
, MemReqPort(this)
|
||||
, MemRspPort(this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
CacheSim::~CacheSim() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void CacheSim::reset() {
|
||||
impl_->reset();
|
||||
}
|
||||
|
||||
void CacheSim::tick() {
|
||||
impl_->tick();
|
||||
}
|
||||
|
||||
const CacheSim::PerfStats& CacheSim::perf_stats() const {
|
||||
return impl_->perf_stats();
|
||||
}
|
||||
@@ -1,13 +1,27 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "memsim.h"
|
||||
#include "mem_sim.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Cache : public SimObject<Cache> {
|
||||
class CacheSim : public SimObject<CacheSim> {
|
||||
public:
|
||||
struct Config {
|
||||
bool bypass; // cache bypass
|
||||
uint8_t C; // log2 cache size
|
||||
uint8_t B; // log2 block size
|
||||
uint8_t W; // log2 word size
|
||||
@@ -45,6 +59,19 @@ public:
|
||||
, mshr_stalls(0)
|
||||
, mem_latency(0)
|
||||
{}
|
||||
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->reads += rhs.reads;
|
||||
this->writes += rhs.writes;
|
||||
this->read_misses += rhs.read_misses;
|
||||
this->write_misses += rhs.write_misses;
|
||||
this->evictions += rhs.evictions;
|
||||
this->pipeline_stalls += rhs.pipeline_stalls;
|
||||
this->bank_stalls += rhs.bank_stalls;
|
||||
this->mshr_stalls += rhs.mshr_stalls;
|
||||
this->mem_latency += rhs.mem_latency;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<SimPort<MemReq>> CoreReqPorts;
|
||||
@@ -52,8 +79,8 @@ public:
|
||||
SimPort<MemReq> MemReqPort;
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
|
||||
Cache(const SimContext& ctx, const char* name, const Config& config);
|
||||
~Cache();
|
||||
CacheSim(const SimContext& ctx, const char* name, const Config& config);
|
||||
~CacheSim();
|
||||
|
||||
void reset();
|
||||
|
||||
222
sim/simx/cluster.cpp
Normal file
222
sim/simx/cluster.cpp
Normal file
@@ -0,0 +1,222 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cluster.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Cluster::Cluster(const SimContext& ctx,
|
||||
uint32_t cluster_id,
|
||||
ProcessorImpl* processor,
|
||||
const Arch &arch, const
|
||||
DCRS &dcrs)
|
||||
: SimObject(ctx, "cluster")
|
||||
, mem_req_port(this)
|
||||
, mem_rsp_port(this)
|
||||
, cluster_id_(cluster_id)
|
||||
, cores_(arch.num_cores())
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
, sharedmems_(arch.num_cores())
|
||||
, processor_(processor)
|
||||
{
|
||||
auto num_cores = arch.num_cores();
|
||||
|
||||
char sname[100];
|
||||
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
|
||||
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
|
||||
!L2_ENABLED,
|
||||
log2ceil(L2_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
log2ceil(L2_NUM_WAYS), // W
|
||||
0, // A
|
||||
XLEN, // address bits
|
||||
L2_NUM_BANKS, // number of banks
|
||||
1, // number of ports
|
||||
5, // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L2_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
});
|
||||
|
||||
l2cache_->MemReqPort.bind(&this->mem_req_port);
|
||||
this->mem_rsp_port.bind(&l2cache_->MemRspPort);
|
||||
|
||||
snprintf(sname, 100, "cluster%d-icaches", cluster_id);
|
||||
icaches_ = CacheCluster::Create(sname, num_cores, NUM_ICACHES, 1, CacheSim::Config{
|
||||
!ICACHE_ENABLED,
|
||||
log2ceil(ICACHE_SIZE), // C
|
||||
log2ceil(L1_LINE_SIZE), // B
|
||||
log2ceil(sizeof(uint32_t)), // W
|
||||
log2ceil(ICACHE_NUM_WAYS),// A
|
||||
XLEN, // address bits
|
||||
1, // number of banks
|
||||
1, // number of ports
|
||||
1, // number of inputs
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
(uint8_t)arch.num_warps(), // mshr
|
||||
2, // pipeline latency
|
||||
});
|
||||
|
||||
icaches_->MemReqPort.bind(&l2cache_->CoreReqPorts.at(0));
|
||||
l2cache_->CoreRspPorts.at(0).bind(&icaches_->MemRspPort);
|
||||
|
||||
snprintf(sname, 100, "cluster%d-dcaches", cluster_id);
|
||||
dcaches_ = CacheCluster::Create(sname, num_cores, NUM_DCACHES, NUM_LSU_LANES, CacheSim::Config{
|
||||
!DCACHE_ENABLED,
|
||||
log2ceil(DCACHE_SIZE), // C
|
||||
log2ceil(L1_LINE_SIZE), // B
|
||||
log2ceil(sizeof(Word)), // W
|
||||
log2ceil(DCACHE_NUM_WAYS),// A
|
||||
XLEN, // address bits
|
||||
DCACHE_NUM_BANKS, // number of banks
|
||||
1, // number of ports
|
||||
DCACHE_NUM_BANKS, // number of inputs
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
DCACHE_MSHR_SIZE, // mshr
|
||||
4, // pipeline latency
|
||||
});
|
||||
|
||||
dcaches_->MemReqPort.bind(&l2cache_->CoreReqPorts.at(1));
|
||||
l2cache_->CoreRspPorts.at(1).bind(&dcaches_->MemRspPort);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// create shared memory blocks
|
||||
for (uint32_t i = 0; i < num_cores; ++i) {
|
||||
snprintf(sname, 100, "cluster%d-shared_mem%d", cluster_id, i);
|
||||
sharedmems_.at(i) = SharedMem::Create(sname, SharedMem::Config{
|
||||
(1 << SMEM_LOG_SIZE),
|
||||
sizeof(Word),
|
||||
NUM_LSU_LANES,
|
||||
NUM_LSU_LANES,
|
||||
false
|
||||
});
|
||||
}
|
||||
|
||||
// create cores
|
||||
|
||||
for (uint32_t i = 0; i < num_cores; ++i) {
|
||||
uint32_t core_id = cluster_id * num_cores + i;
|
||||
cores_.at(i) = Core::Create(core_id,
|
||||
this,
|
||||
arch,
|
||||
dcrs,
|
||||
sharedmems_.at(i));
|
||||
|
||||
cores_.at(i)->icache_req_ports.at(0).bind(&icaches_->CoreReqPorts.at(i).at(0));
|
||||
icaches_->CoreRspPorts.at(i).at(0).bind(&cores_.at(i)->icache_rsp_ports.at(0));
|
||||
|
||||
for (uint32_t j = 0; j < NUM_LSU_LANES; ++j) {
|
||||
snprintf(sname, 100, "cluster%d-smem_demux%d_%d", cluster_id, i, j);
|
||||
auto smem_demux = SMemDemux::Create(sname);
|
||||
|
||||
cores_.at(i)->dcache_req_ports.at(j).bind(&smem_demux->ReqIn);
|
||||
smem_demux->RspIn.bind(&cores_.at(i)->dcache_rsp_ports.at(j));
|
||||
|
||||
smem_demux->ReqDc.bind(&dcaches_->CoreReqPorts.at(i).at(j));
|
||||
dcaches_->CoreRspPorts.at(i).at(j).bind(&smem_demux->RspDc);
|
||||
|
||||
smem_demux->ReqSm.bind(&sharedmems_.at(i)->Inputs.at(j));
|
||||
sharedmems_.at(i)->Outputs.at(j).bind(&smem_demux->RspSm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Cluster::~Cluster() {
|
||||
//--
|
||||
}
|
||||
|
||||
void Cluster::reset() {
|
||||
for (auto& barrier : barriers_) {
|
||||
barrier.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void Cluster::tick() {
|
||||
//--
|
||||
}
|
||||
|
||||
void Cluster::attach_ram(RAM* ram) {
|
||||
for (auto core : cores_) {
|
||||
core->attach_ram(ram);
|
||||
}
|
||||
}
|
||||
|
||||
bool Cluster::running() const {
|
||||
for (auto& core : cores_) {
|
||||
if (core->running())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Cluster::check_exit(Word* exitcode, bool riscv_test) const {
|
||||
bool done = true;
|
||||
Word exitcode_ = 0;
|
||||
for (auto& core : cores_) {
|
||||
Word ec;
|
||||
if (core->check_exit(&ec, riscv_test)) {
|
||||
exitcode_ |= ec;
|
||||
} else {
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
*exitcode = exitcode_;
|
||||
return done;
|
||||
}
|
||||
|
||||
void Cluster::barrier(uint32_t bar_id, uint32_t count, uint32_t core_id) {
|
||||
auto& barrier = barriers_.at(bar_id);
|
||||
|
||||
uint32_t local_core_id = core_id % cores_.size();
|
||||
barrier.set(local_core_id);
|
||||
|
||||
DP(3, "*** Suspend core #" << core_id << " at barrier #" << bar_id);
|
||||
|
||||
if (barrier.count() == (size_t)count) {
|
||||
// resume all suspended cores
|
||||
for (uint32_t i = 0; i < cores_.size(); ++i) {
|
||||
if (barrier.test(i)) {
|
||||
DP(3, "*** Resume core #" << i << " at barrier #" << bar_id);
|
||||
cores_.at(i)->resume();
|
||||
}
|
||||
}
|
||||
barrier.reset();
|
||||
}
|
||||
}
|
||||
|
||||
ProcessorImpl* Cluster::processor() const {
|
||||
return processor_;
|
||||
}
|
||||
|
||||
Cluster::PerfStats Cluster::perf_stats() const {
|
||||
Cluster::PerfStats perf;
|
||||
perf.icache = icaches_->perf_stats();
|
||||
perf.dcache = dcaches_->perf_stats();
|
||||
perf.tcache = tcaches_->perf_stats();
|
||||
perf.ocache = ocaches_->perf_stats();
|
||||
perf.rcache = rcaches_->perf_stats();
|
||||
perf.l2cache = l2cache_->perf_stats();
|
||||
|
||||
for (auto sharedmem : sharedmems_) {
|
||||
perf.sharedmem += sharedmem->perf_stats();
|
||||
}
|
||||
|
||||
return perf;
|
||||
}
|
||||
92
sim/simx/cluster.h
Normal file
92
sim/simx/cluster.h
Normal file
@@ -0,0 +1,92 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "dcrs.h"
|
||||
#include "arch.h"
|
||||
#include "cache_cluster.h"
|
||||
#include "shared_mem.h"
|
||||
#include "core.h"
|
||||
#include "constants.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ProcessorImpl;
|
||||
|
||||
class Cluster : public SimObject<Cluster> {
|
||||
public:
|
||||
struct PerfStats {
|
||||
CacheSim::PerfStats icache;
|
||||
CacheSim::PerfStats dcache;
|
||||
SharedMem::PerfStats sharedmem;
|
||||
CacheSim::PerfStats l2cache;
|
||||
CacheSim::PerfStats tcache;
|
||||
CacheSim::PerfStats ocache;
|
||||
CacheSim::PerfStats rcache;
|
||||
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->icache += rhs.icache;
|
||||
this->dcache += rhs.dcache;
|
||||
this->sharedmem += rhs.sharedmem;
|
||||
this->l2cache += rhs.l2cache;
|
||||
this->tcache += rhs.tcache;
|
||||
this->ocache += rhs.ocache;
|
||||
this->rcache += rhs.rcache;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
SimPort<MemReq> mem_req_port;
|
||||
SimPort<MemRsp> mem_rsp_port;
|
||||
|
||||
Cluster(const SimContext& ctx,
|
||||
uint32_t cluster_id,
|
||||
ProcessorImpl* processor,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs);
|
||||
|
||||
~Cluster();
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
bool running() const;
|
||||
|
||||
bool check_exit(Word* exitcode, bool riscv_test) const;
|
||||
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t core_id);
|
||||
|
||||
ProcessorImpl* processor() const;
|
||||
|
||||
Cluster::PerfStats perf_stats() const;
|
||||
|
||||
private:
|
||||
uint32_t cluster_id_;
|
||||
std::vector<Core::Ptr> cores_;
|
||||
std::vector<CoreMask> barriers_;
|
||||
CacheSim::Ptr l2cache_;
|
||||
CacheCluster::Ptr icaches_;
|
||||
CacheCluster::Ptr dcaches_;
|
||||
std::vector<SharedMem::Ptr> sharedmems_;
|
||||
CacheCluster::Ptr tcaches_;
|
||||
CacheCluster::Ptr ocaches_;
|
||||
CacheCluster::Ptr rcaches_;
|
||||
ProcessorImpl* processor_;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef RAM_PAGE_SIZE
|
||||
@@ -10,14 +23,4 @@
|
||||
|
||||
#ifndef MEMORY_BANKS
|
||||
#define MEMORY_BANKS 2
|
||||
#endif
|
||||
|
||||
namespace vortex {
|
||||
|
||||
enum Constants {
|
||||
|
||||
SMEM_BANK_OFFSET = log2ceil(sizeof(uint32_t)) + log2ceil(STACK_SIZE / sizeof(uint32_t)),
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
153
sim/simx/core.h
153
sim/simx/core.h
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
@@ -11,101 +24,104 @@
|
||||
#include <simobject.h>
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "archdef.h"
|
||||
#include "arch.h"
|
||||
#include "decode.h"
|
||||
#include "mem.h"
|
||||
#include "warp.h"
|
||||
#include "pipeline.h"
|
||||
#include "cache.h"
|
||||
#include "sharedmem.h"
|
||||
#include "cache_sim.h"
|
||||
#include "shared_mem.h"
|
||||
#include "ibuffer.h"
|
||||
#include "scoreboard.h"
|
||||
#include "exeunit.h"
|
||||
#include "tex_unit.h"
|
||||
#include "operand.h"
|
||||
#include "dispatcher.h"
|
||||
#include "exe_unit.h"
|
||||
#include "dcrs.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Cluster;
|
||||
|
||||
class Core : public SimObject<Core> {
|
||||
public:
|
||||
struct PerfStats {
|
||||
uint64_t cycles;
|
||||
uint64_t instrs;
|
||||
uint64_t ibuf_stalls;
|
||||
uint64_t scrb_stalls;
|
||||
uint64_t alu_stalls;
|
||||
uint64_t lsu_stalls;
|
||||
uint64_t csr_stalls;
|
||||
uint64_t fpu_stalls;
|
||||
uint64_t gpu_stalls;
|
||||
uint64_t sfu_stalls;
|
||||
uint64_t ifetches;
|
||||
uint64_t loads;
|
||||
uint64_t stores;
|
||||
uint64_t branches;
|
||||
uint64_t mem_reads;
|
||||
uint64_t mem_writes;
|
||||
uint64_t mem_latency;
|
||||
uint64_t tex_reads;
|
||||
uint64_t tex_latency;
|
||||
uint64_t ifetch_latency;
|
||||
uint64_t load_latency;
|
||||
|
||||
PerfStats()
|
||||
: instrs(0)
|
||||
: cycles(0)
|
||||
, instrs(0)
|
||||
, ibuf_stalls(0)
|
||||
, scrb_stalls(0)
|
||||
, alu_stalls(0)
|
||||
, lsu_stalls(0)
|
||||
, csr_stalls(0)
|
||||
, fpu_stalls(0)
|
||||
, gpu_stalls(0)
|
||||
, sfu_stalls(0)
|
||||
, ifetches(0)
|
||||
, loads(0)
|
||||
, stores(0)
|
||||
, branches(0)
|
||||
, mem_reads(0)
|
||||
, mem_writes(0)
|
||||
, mem_latency(0)
|
||||
, tex_reads(0)
|
||||
, tex_latency(0)
|
||||
, ifetch_latency(0)
|
||||
, load_latency(0)
|
||||
{}
|
||||
};
|
||||
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
std::vector<SimPort<MemReq>> icache_req_ports;
|
||||
std::vector<SimPort<MemRsp>> icache_rsp_ports;
|
||||
|
||||
std::vector<SimPort<MemReq>> dcache_req_ports;
|
||||
std::vector<SimPort<MemRsp>> dcache_rsp_ports;
|
||||
|
||||
Core(const SimContext& ctx,
|
||||
uint32_t core_id,
|
||||
Cluster* cluster,
|
||||
const Arch &arch,
|
||||
const DCRS &dcrs,
|
||||
SharedMem::Ptr sharedmem);
|
||||
|
||||
Core(const SimContext& ctx, const ArchDef &arch, uint32_t id);
|
||||
~Core();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
bool running() const;
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
bool running() const;
|
||||
|
||||
void resume();
|
||||
|
||||
uint32_t id() const {
|
||||
return id_;
|
||||
return core_id_;
|
||||
}
|
||||
|
||||
const Decoder& decoder() {
|
||||
return decoder_;
|
||||
}
|
||||
|
||||
const ArchDef& arch() const {
|
||||
const Arch& arch() const {
|
||||
return arch_;
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
uint32_t getIRegValue(int reg) const {
|
||||
return warps_.at(0)->getIRegValue(reg);
|
||||
const DCRS& dcrs() const {
|
||||
return dcrs_;
|
||||
}
|
||||
|
||||
uint32_t get_csr(uint32_t addr, uint32_t tid, uint32_t wid);
|
||||
|
||||
void set_csr(uint32_t addr, uint32_t value, uint32_t tid, uint32_t wid);
|
||||
|
||||
WarpMask wspawn(uint32_t num_warps, uint32_t nextPC);
|
||||
void wspawn(uint32_t num_warps, Word nextPC);
|
||||
|
||||
WarpMask barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
|
||||
void barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
|
||||
|
||||
AddrType get_addr_type(uint64_t addr);
|
||||
|
||||
void icache_read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
@@ -113,19 +129,22 @@ public:
|
||||
|
||||
void dcache_write(const void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
uint32_t tex_read(uint32_t unit, uint32_t lod, uint32_t u, uint32_t v, std::vector<mem_addr_size_t>* mem_addrs);
|
||||
void dcache_amo_reserve(uint64_t addr);
|
||||
|
||||
bool dcache_amo_check(uint64_t addr);
|
||||
|
||||
void trigger_ecall();
|
||||
|
||||
void trigger_ebreak();
|
||||
|
||||
bool check_exit() const;
|
||||
bool check_exit(Word* exitcode, bool riscv_test) const;
|
||||
|
||||
private:
|
||||
|
||||
void schedule();
|
||||
void fetch();
|
||||
void decode();
|
||||
void issue();
|
||||
void execute();
|
||||
void commit();
|
||||
|
||||
@@ -133,49 +152,55 @@ private:
|
||||
|
||||
void cout_flush();
|
||||
|
||||
uint32_t id_;
|
||||
const ArchDef arch_;
|
||||
uint32_t core_id_;
|
||||
const Arch& arch_;
|
||||
const DCRS &dcrs_;
|
||||
|
||||
const Decoder decoder_;
|
||||
MemoryUnit mmu_;
|
||||
RAM smem_;
|
||||
std::vector<TexUnit> tex_units_;
|
||||
|
||||
std::vector<std::shared_ptr<Warp>> warps_;
|
||||
std::vector<WarpMask> barriers_;
|
||||
std::vector<uint32_t> csrs_;
|
||||
std::vector<WarpMask> barriers_;
|
||||
std::vector<Byte> fcsrs_;
|
||||
std::vector<IBuffer> ibuffers_;
|
||||
Scoreboard scoreboard_;
|
||||
std::vector<Operand::Ptr> operands_;
|
||||
std::vector<Dispatcher::Ptr> dispatchers_;
|
||||
std::vector<ExeUnit::Ptr> exe_units_;
|
||||
Cache::Ptr icache_;
|
||||
Cache::Ptr dcache_;
|
||||
SharedMem::Ptr shared_mem_;
|
||||
Switch<MemReq, MemRsp>::Ptr l1_mem_switch_;
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> dcache_switch_;
|
||||
SharedMem::Ptr sharedmem_;
|
||||
|
||||
PipelineLatch fetch_latch_;
|
||||
PipelineLatch decode_latch_;
|
||||
|
||||
HashTable<pipeline_trace_t*> pending_icache_;
|
||||
std::vector<pipeline_trace_t*> committed_traces_;
|
||||
WarpMask active_warps_;
|
||||
WarpMask stalled_warps_;
|
||||
uint32_t last_schedule_wid_;
|
||||
uint64_t issued_instrs_;
|
||||
uint64_t committed_instrs_;
|
||||
uint32_t csr_tex_unit_;
|
||||
bool ecall_;
|
||||
bool ebreak_;
|
||||
bool exited_;
|
||||
|
||||
uint64_t pending_ifetches_;
|
||||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
std::vector<std::vector<CSRs>> csrs_;
|
||||
|
||||
PerfStats perf_stats_;
|
||||
uint64_t perf_mem_pending_reads_;
|
||||
|
||||
Cluster* cluster_;
|
||||
|
||||
uint32_t commit_exe_;
|
||||
|
||||
friend class Warp;
|
||||
friend class LsuUnit;
|
||||
friend class AluUnit;
|
||||
friend class CsrUnit;
|
||||
friend class FpuUnit;
|
||||
friend class GpuUnit;
|
||||
friend class SfuUnit;
|
||||
friend class TexUnit;
|
||||
friend class RasterAgent;
|
||||
friend class RopAgent;
|
||||
friend class TexAgent;
|
||||
};
|
||||
|
||||
} // namespace vortex
|
||||
} // namespace vortex
|
||||
|
||||
28
sim/simx/dcrs.cpp
Normal file
28
sim/simx/dcrs.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "dcrs.h"
|
||||
#include <iostream>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
void DCRS::write(uint32_t addr, uint32_t value) {
|
||||
if (addr >= VX_DCR_BASE_STATE_BEGIN
|
||||
&& addr < VX_DCR_BASE_STATE_END) {
|
||||
base_dcrs.write(addr, value);
|
||||
return;
|
||||
}
|
||||
|
||||
std::cout << std::hex << "Error: invalid global DCR addr=0x" << addr << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
45
sim/simx/dcrs.h
Normal file
45
sim/simx/dcrs.h
Normal file
@@ -0,0 +1,45 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <util.h>
|
||||
#include <VX_types.h>
|
||||
#include <array>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class BaseDCRS {
|
||||
public:
|
||||
uint32_t read(uint32_t addr) const {
|
||||
uint32_t state = VX_DCR_BASE_STATE(addr);
|
||||
return states_.at(state);
|
||||
}
|
||||
|
||||
void write(uint32_t addr, uint32_t value) {
|
||||
uint32_t state = VX_DCR_BASE_STATE(addr);
|
||||
states_.at(state) = value;
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<uint32_t, VX_DCR_BASE_STATE_COUNT> states_;
|
||||
};
|
||||
|
||||
class DCRS {
|
||||
public:
|
||||
void write(uint32_t addr, uint32_t value);
|
||||
|
||||
BaseDCRS base_dcrs;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef DEBUG_LEVEL
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <stdlib.h>
|
||||
@@ -9,41 +22,36 @@
|
||||
#include "debug.h"
|
||||
#include "types.h"
|
||||
#include "decode.h"
|
||||
#include "archdef.h"
|
||||
#include "arch.h"
|
||||
#include "instr.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
struct InstTableEntry_t {
|
||||
bool controlFlow;
|
||||
InstType iType;
|
||||
};
|
||||
|
||||
static const std::unordered_map<Opcode, struct InstTableEntry_t> sc_instTable = {
|
||||
{Opcode::NOP, {false, InstType::N_TYPE}},
|
||||
{Opcode::R_INST, {false, InstType::R_TYPE}},
|
||||
{Opcode::L_INST, {false, InstType::I_TYPE}},
|
||||
{Opcode::I_INST, {false, InstType::I_TYPE}},
|
||||
{Opcode::S_INST, {false, InstType::S_TYPE}},
|
||||
{Opcode::B_INST, {true , InstType::B_TYPE}},
|
||||
{Opcode::LUI_INST, {false, InstType::U_TYPE}},
|
||||
{Opcode::AUIPC_INST, {false, InstType::U_TYPE}},
|
||||
{Opcode::JAL_INST, {true , InstType::J_TYPE}},
|
||||
{Opcode::JALR_INST, {true , InstType::I_TYPE}},
|
||||
{Opcode::SYS_INST, {true , InstType::I_TYPE}},
|
||||
{Opcode::FENCE, {true , InstType::I_TYPE}},
|
||||
{Opcode::FL, {false, InstType::I_TYPE}},
|
||||
{Opcode::FS, {false, InstType::S_TYPE}},
|
||||
{Opcode::FCI, {false, InstType::R_TYPE}},
|
||||
{Opcode::FMADD, {false, InstType::R4_TYPE}},
|
||||
{Opcode::FMSUB, {false, InstType::R4_TYPE}},
|
||||
{Opcode::FMNMADD, {false, InstType::R4_TYPE}},
|
||||
{Opcode::FMNMSUB, {false, InstType::R4_TYPE}},
|
||||
{Opcode::VSET, {false, InstType::V_TYPE}},
|
||||
{Opcode::GPGPU, {false, InstType::R_TYPE}},
|
||||
{Opcode::GPU, {false, InstType::R4_TYPE}},
|
||||
{Opcode::R_INST_W, {false, InstType::R_TYPE}},
|
||||
{Opcode::I_INST_W, {false, InstType::I_TYPE}},
|
||||
static const std::unordered_map<Opcode, InstType> sc_instTable = {
|
||||
{Opcode::R_INST, InstType::R_TYPE},
|
||||
{Opcode::L_INST, InstType::I_TYPE},
|
||||
{Opcode::I_INST, InstType::I_TYPE},
|
||||
{Opcode::S_INST, InstType::S_TYPE},
|
||||
{Opcode::B_INST, InstType::B_TYPE},
|
||||
{Opcode::LUI_INST, InstType::U_TYPE},
|
||||
{Opcode::AUIPC_INST, InstType::U_TYPE},
|
||||
{Opcode::JAL_INST, InstType::J_TYPE},
|
||||
{Opcode::JALR_INST, InstType::I_TYPE},
|
||||
{Opcode::SYS_INST, InstType::I_TYPE},
|
||||
{Opcode::FENCE, InstType::I_TYPE},
|
||||
{Opcode::AMO, InstType::R_TYPE},
|
||||
{Opcode::FL, InstType::I_TYPE},
|
||||
{Opcode::FS, InstType::S_TYPE},
|
||||
{Opcode::FCI, InstType::R_TYPE},
|
||||
{Opcode::FMADD, InstType::R4_TYPE},
|
||||
{Opcode::FMSUB, InstType::R4_TYPE},
|
||||
{Opcode::FMNMADD, InstType::R4_TYPE},
|
||||
{Opcode::FMNMSUB, InstType::R4_TYPE},
|
||||
{Opcode::VSET, InstType::V_TYPE},
|
||||
{Opcode::EXT1, InstType::R_TYPE},
|
||||
{Opcode::EXT2, InstType::R4_TYPE},
|
||||
{Opcode::R_INST_W, InstType::R_TYPE},
|
||||
{Opcode::I_INST_W, InstType::I_TYPE},
|
||||
};
|
||||
|
||||
enum Constants {
|
||||
@@ -58,6 +66,8 @@ enum Constants {
|
||||
width_i_imm = 12,
|
||||
width_j_imm = 20,
|
||||
width_v_imm = 11,
|
||||
width_aq = 1,
|
||||
width_rl = 1,
|
||||
|
||||
shift_opcode= 0,
|
||||
shift_rd = width_opcode,
|
||||
@@ -72,15 +82,15 @@ enum Constants {
|
||||
shift_func6 = shift_func7 + width_vmask,
|
||||
shift_vset = shift_func7 + width_func6,
|
||||
|
||||
mask_opcode = (1<<width_opcode)-1,
|
||||
mask_reg = (1<<width_reg)-1,
|
||||
mask_func2 = (1<<width_func2)-1,
|
||||
mask_func3 = (1<<width_func3)-1,
|
||||
mask_func6 = (1<<width_func6)-1,
|
||||
mask_func7 = (1<<width_func7)-1,
|
||||
mask_i_imm = (1<<width_i_imm)-1,
|
||||
mask_j_imm = (1<<width_j_imm)-1,
|
||||
mask_v_imm = (1<<width_v_imm)-1,
|
||||
mask_opcode = (1 << width_opcode) - 1,
|
||||
mask_reg = (1 << width_reg) - 1,
|
||||
mask_func2 = (1 << width_func2) - 1,
|
||||
mask_func3 = (1 << width_func3) - 1,
|
||||
mask_func6 = (1 << width_func6) - 1,
|
||||
mask_func7 = (1 << width_func7) - 1,
|
||||
mask_i_imm = (1 << width_i_imm) - 1,
|
||||
mask_j_imm = (1 << width_j_imm) - 1,
|
||||
mask_v_imm = (1 << width_v_imm) - 1,
|
||||
};
|
||||
|
||||
static const char* op_string(const Instr &instr) {
|
||||
@@ -92,7 +102,6 @@ static const char* op_string(const Instr &instr) {
|
||||
auto imm = instr.getImm();
|
||||
|
||||
switch (opcode) {
|
||||
case Opcode::NOP: return "NOP";
|
||||
case Opcode::LUI_INST: return "LUI";
|
||||
case Opcode::AUIPC_INST: return "AUIPC";
|
||||
case Opcode::R_INST:
|
||||
@@ -116,7 +125,7 @@ static const char* op_string(const Instr &instr) {
|
||||
case 2: return "SLT";
|
||||
case 3: return "SLTU";
|
||||
case 4: return "XOR";
|
||||
case 5: return func7 ? "SRA" : "SRL";
|
||||
case 5: return (func7 & 0x20) ? "SRA" : "SRL";
|
||||
case 6: return "OR";
|
||||
case 7: return "AND";
|
||||
default:
|
||||
@@ -130,7 +139,7 @@ static const char* op_string(const Instr &instr) {
|
||||
case 2: return "SLTI";
|
||||
case 3: return "SLTIU";
|
||||
case 4: return "XORI";
|
||||
case 5: return func7 ? "SRAI" : "SRLI";
|
||||
case 5: return (func7 & 0x20) ? "SRAI" : "SRLI";
|
||||
case 6: return "ORI";
|
||||
case 7: return "ANDI";
|
||||
default:
|
||||
@@ -151,8 +160,8 @@ static const char* op_string(const Instr &instr) {
|
||||
case Opcode::JALR_INST: return "JALR";
|
||||
case Opcode::L_INST:
|
||||
switch (func3) {
|
||||
case 0: return "LBI";
|
||||
case 1: return "LHI";
|
||||
case 0: return "LB";
|
||||
case 1: return "LH";
|
||||
case 2: return "LW";
|
||||
case 3: return "LD";
|
||||
case 4: return "LBU";
|
||||
@@ -192,11 +201,11 @@ static const char* op_string(const Instr &instr) {
|
||||
}
|
||||
case Opcode::I_INST_W:
|
||||
switch (func3) {
|
||||
case 0: return "ADDIW";
|
||||
case 1: return "SLLIW";
|
||||
case 5: return func7 ? "SRAIW" : "SRLIW";
|
||||
default:
|
||||
std::abort();
|
||||
case 0: return "ADDIW";
|
||||
case 1: return "SLLIW";
|
||||
case 5: return func7 ? "SRAIW" : "SRLIW";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::SYS_INST:
|
||||
switch (func3) {
|
||||
@@ -222,20 +231,59 @@ static const char* op_string(const Instr &instr) {
|
||||
case Opcode::FENCE: return "FENCE";
|
||||
case Opcode::FL:
|
||||
switch (func3) {
|
||||
case 0x1: return "VL";
|
||||
case 0x2: return "FLW";
|
||||
case 0x3: return "FLD";
|
||||
default:
|
||||
std::abort();
|
||||
case 0x1: return "VL";
|
||||
case 0x2: return "FLW";
|
||||
case 0x3: return "FLD";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::FS:
|
||||
switch (func3) {
|
||||
case 0x1: return "VS";
|
||||
case 0x2: return "FSW";
|
||||
case 0x3: return "FSD";
|
||||
case 0x1: return "VS";
|
||||
case 0x2: return "FSW";
|
||||
case 0x3: return "FSD";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::AMO: {
|
||||
auto amo_type = func7 >> 2;
|
||||
switch (func3) {
|
||||
case 0x2:
|
||||
switch (amo_type) {
|
||||
case 0x00: return "AMOADD.W";
|
||||
case 0x01: return "AMOSWAP.W";
|
||||
case 0x02: return "LR.W";
|
||||
case 0x03: return "SC.W";
|
||||
case 0x04: return "AMOXOR.W";
|
||||
case 0x08: return "AMOOR.W";
|
||||
case 0x0c: return "AMOAND.W";
|
||||
case 0x10: return "AMOMIN.W";
|
||||
case 0x14: return "AMOMAX.W";
|
||||
case 0x18: return "AMOMINU.W";
|
||||
case 0x1c: return "AMOMAXU.W";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x3:
|
||||
switch (amo_type) {
|
||||
case 0x00: return "AMOADD.D";
|
||||
case 0x01: return "AMOSWAP.D";
|
||||
case 0x02: return "LR.D";
|
||||
case 0x03: return "SC.D";
|
||||
case 0x04: return "AMOXOR.D";
|
||||
case 0x08: return "AMOOR.D";
|
||||
case 0x0c: return "AMOAND.D";
|
||||
case 0x10: return "AMOMIN.D";
|
||||
case 0x14: return "AMOMAX.D";
|
||||
case 0x18: return "AMOMINU.D";
|
||||
case 0x1c: return "AMOMAXU.D";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
case Opcode::FCI:
|
||||
switch (func7) {
|
||||
case 0x00: return "FADD.S";
|
||||
@@ -332,9 +380,9 @@ static const char* op_string(const Instr &instr) {
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x70: return func3 ? "FCLASS.S" : "FMV.X.W";
|
||||
case 0x70: return func3 ? "FCLASS.S" : "FMV.X.S";
|
||||
case 0x71: return func3 ? "FCLASS.D" : "FMV.X.D";
|
||||
case 0x78: return "FMV.W.X";
|
||||
case 0x78: return "FMV.S.X";
|
||||
case 0x79: return "FMV.D.X";
|
||||
default:
|
||||
std::abort();
|
||||
@@ -344,23 +392,36 @@ static const char* op_string(const Instr &instr) {
|
||||
case Opcode::FMNMADD: return func2 ? "FNMADD.D" : "FNMADD.S";
|
||||
case Opcode::FMNMSUB: return func2 ? "FNMSUB.D" : "FNMSUB.S";
|
||||
case Opcode::VSET: return "VSET";
|
||||
case Opcode::GPGPU:
|
||||
switch (func3) {
|
||||
case 0: return "TMC";
|
||||
case 1: return "WSPAWN";
|
||||
case 2: return "SPLIT";
|
||||
case 3: return "JOIN";
|
||||
case 4: return "BAR";
|
||||
case 5: return "PREFETCH";
|
||||
case Opcode::EXT1:
|
||||
switch (func7) {
|
||||
case 0:
|
||||
switch (func3) {
|
||||
case 0: return "TMC";
|
||||
case 1: return "WSPAWN";
|
||||
case 2: return "SPLIT";
|
||||
case 3: return "JOIN";
|
||||
case 4: return "BAR";
|
||||
case 5: return "PRED";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 1:
|
||||
switch (func3) {
|
||||
case 0: return "RASTER";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::GPU:
|
||||
case Opcode::EXT2:
|
||||
switch (func3) {
|
||||
case 0: return "TEX";
|
||||
case 0:
|
||||
return "TEX";
|
||||
case 1: {
|
||||
switch (func2) {
|
||||
case 0: return "CMOV";
|
||||
case 1: return "ROP";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
@@ -375,43 +436,36 @@ static const char* op_string(const Instr &instr) {
|
||||
|
||||
namespace vortex {
|
||||
std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
||||
auto opcode = instr.getOpcode();
|
||||
auto func2 = instr.getFunc2();
|
||||
auto opcode = instr.getOpcode();
|
||||
auto func3 = instr.getFunc3();
|
||||
|
||||
os << op_string(instr) << ": ";
|
||||
|
||||
if (opcode == S_INST
|
||||
|| opcode == FS) {
|
||||
os << "M[r" << std::dec << instr.getRSrc(0) << " + 0x" << std::hex << instr.getImm() << "] <- ";
|
||||
os << instr.getRSType(1) << std::dec << instr.getRSrc(1);
|
||||
} else
|
||||
if (opcode == L_INST
|
||||
|| opcode == FL) {
|
||||
os << instr.getRDType() << std::dec << instr.getRDest() << " <- ";
|
||||
os << "M[r" << std::dec << instr.getRSrc(0) << " + 0x" << std::hex << instr.getImm() << "]";
|
||||
} else {
|
||||
if (instr.getRDType() != RegType::None) {
|
||||
os << instr.getRDType() << std::dec << instr.getRDest() << " <- ";
|
||||
}
|
||||
uint32_t i = 0;
|
||||
for (; i < instr.getNRSrc(); ++i) {
|
||||
if (i) os << ", ";
|
||||
os << instr.getRSType(i) << std::dec << instr.getRSrc(i);
|
||||
}
|
||||
if (instr.hasImm()) {
|
||||
if (i) os << ", ";
|
||||
os << "imm=0x" << std::hex << instr.getImm();
|
||||
}
|
||||
if (opcode == GPU && func3 == 0) {
|
||||
os << ", unit=" << std::dec << func2;
|
||||
}
|
||||
os << op_string(instr);
|
||||
|
||||
int sep = 0;
|
||||
if (instr.getRDType() != RegType::None) {
|
||||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << instr.getRDType() << std::dec << instr.getRDest();
|
||||
}
|
||||
for (uint32_t i = 0; i < instr.getNRSrc(); ++i) {
|
||||
if (instr.getRSType(i) == RegType::None)
|
||||
continue;
|
||||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << instr.getRSType(i) << std::dec << instr.getRSrc(i);
|
||||
}
|
||||
if (instr.hasImm()) {
|
||||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << "0x" << std::hex << instr.getImm();
|
||||
}
|
||||
if (opcode == Opcode::SYS_INST && func3 >= 5) {
|
||||
// CSRs with immediate values
|
||||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << "0x" << std::hex << instr.getRSrc(0);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
}
|
||||
|
||||
Decoder::Decoder(const ArchDef&) {}
|
||||
Decoder::Decoder(const Arch&) {}
|
||||
|
||||
std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
auto instr = std::make_shared<Instr>();
|
||||
@@ -434,7 +488,7 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto iType = op_it->second.iType;
|
||||
auto iType = op_it->second;
|
||||
if (op == Opcode::FL || op == Opcode::FS) {
|
||||
if (func3 != 0x2 && func3 != 0x3) {
|
||||
iType = InstType::V_TYPE;
|
||||
@@ -442,57 +496,97 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
}
|
||||
|
||||
switch (iType) {
|
||||
case InstType::N_TYPE:
|
||||
break;
|
||||
|
||||
case InstType::R_TYPE:
|
||||
if (op == Opcode::FCI) {
|
||||
switch (func7) {
|
||||
switch (op) {
|
||||
case Opcode::FCI:
|
||||
switch (func7) {
|
||||
case 0x2c: // FSQRT.S
|
||||
case 0x2d: // FSQRT.D
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
break;
|
||||
case 0x50: // FLE.S, FLT.S, FEQ.S
|
||||
case 0x51: // FLE.D, FLT.D, FEQ.D
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
break;
|
||||
case 0x60: // FCVT.W.D, FCVT.WU.D, FCVT.L.D, FCVT.LU.D
|
||||
case 0x61: // FCVT.WU.S, FCVT.W.S, FCVT.L.S, FCVT.LU.S
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::None);
|
||||
break;
|
||||
case 0x68: // FCVT.S.W, FCVT.S.WU, FCVT.S.L, FCVT.S.LU
|
||||
case 0x69: // FCVT.D.W, FCVT.D.WU, FCVT.D.L, FCVT.D.LU
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::None);
|
||||
break;
|
||||
case 0x70: // FCLASS.S, FMV.X.W
|
||||
case 0x70: // FCLASS.S, FMV.X.S
|
||||
case 0x71: // FCLASS.D, FMV.X.D
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
break;
|
||||
case 0x78: // FMV.W.X
|
||||
case 0x78: // FMV.S.X
|
||||
case 0x79: // FMV.D.X
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
break;
|
||||
default:
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
case Opcode::EXT1:
|
||||
switch (func7) {
|
||||
case 0:
|
||||
switch (func3) {
|
||||
case 0: // TMC
|
||||
case 3: // JOIN
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
break;
|
||||
case 1: // WSPAWN
|
||||
case 4: // BAR
|
||||
case 5: // PRED
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
break;
|
||||
case 2: // SPLIT
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
switch (func3) {
|
||||
case 0: // RASTER
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
break;
|
||||
}
|
||||
instr->setFunc3(func3);
|
||||
instr->setFunc7(func7);
|
||||
break;
|
||||
|
||||
case InstType::I_TYPE: {
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
if (op == Opcode::FL) {
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
} else {
|
||||
@@ -503,15 +597,23 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
switch (op) {
|
||||
case Opcode::SYS_INST:
|
||||
if (func3 != 0) {
|
||||
// RV32I: CSR*
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
}
|
||||
// RV32I: CSR
|
||||
if (func3 >= 5) {
|
||||
// rs1 holds zimm
|
||||
instr->setSrcReg(0, rs1, RegType::None);
|
||||
}
|
||||
} else {
|
||||
instr->setDestReg(rd, RegType::None);
|
||||
instr->setSrcReg(0, rs1, RegType::None);
|
||||
}
|
||||
// uint12
|
||||
instr->setImm(code >> shift_rs2);
|
||||
break;
|
||||
case Opcode::FENCE:
|
||||
// uint12
|
||||
instr->setImm(code >> shift_rs2);
|
||||
instr->setDestReg(rd, RegType::None);
|
||||
instr->setSrcReg(0, rs1, RegType::None);
|
||||
break;
|
||||
case Opcode::I_INST:
|
||||
case Opcode::I_INST_W:
|
||||
@@ -538,11 +640,11 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
}
|
||||
} break;
|
||||
case InstType::S_TYPE: {
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
if (op == Opcode::FS) {
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
} else {
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
}
|
||||
instr->setFunc3(func3);
|
||||
auto imm = (func7 << width_reg) | rd;
|
||||
@@ -550,8 +652,8 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
} break;
|
||||
|
||||
case InstType::B_TYPE: {
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
instr->setFunc3(func3);
|
||||
auto bit_11 = rd & 0x1;
|
||||
auto bits_4_1 = rd >> 1;
|
||||
@@ -581,8 +683,8 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
case InstType::V_TYPE:
|
||||
switch (op) {
|
||||
case Opcode::VSET: {
|
||||
instr->setDestVReg(rd);
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->setDestReg(rd, RegType::Vector);
|
||||
instr->addSrcReg(rs1, RegType::Vector);
|
||||
instr->setFunc3(func3);
|
||||
if (func3 == 7) {
|
||||
instr->setImm(!(code >> shift_vset));
|
||||
@@ -593,20 +695,20 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
instr->setVediv((immed >> 4) & 0x3);
|
||||
instr->setVsew((immed >> 2) & 0x3);
|
||||
} else {
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
}
|
||||
} else {
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
instr->setVmask((code >> shift_func7) & 0x1);
|
||||
instr->setFunc6(func6);
|
||||
}
|
||||
} break;
|
||||
|
||||
case Opcode::FL:
|
||||
instr->setDestVReg(rd);
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->setDestReg(rd, RegType::Vector);
|
||||
instr->addSrcReg(rs1, RegType::Vector);
|
||||
instr->setVlsWidth(func3);
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
instr->setVmask(code >> shift_func7);
|
||||
instr->setVmop((code >> shift_vmop) & mask_func3);
|
||||
instr->setVnf((code >> shift_vnf) & mask_func3);
|
||||
@@ -614,9 +716,9 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
|
||||
case Opcode::FS:
|
||||
instr->setVs3(rd);
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->addSrcReg(rs1, RegType::Vector);
|
||||
instr->setVlsWidth(func3);
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
instr->setVmask(code >> shift_func7);
|
||||
instr->setVmop((code >> shift_vmop) & mask_func3);
|
||||
instr->setVnf((code >> shift_vnf) & mask_func3);
|
||||
@@ -627,16 +729,28 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
}
|
||||
break;
|
||||
case R4_TYPE:
|
||||
if (op == Opcode::GPU) {
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->setSrcReg(rs3, RegType::Integer);
|
||||
if (op == Opcode::EXT2) {
|
||||
switch (func3) {
|
||||
case 1:
|
||||
switch (func2) {
|
||||
case 0: // CMOV
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
instr->addSrcReg(rs3, RegType::Integer);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
} else {
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
instr->setSrcReg(rs3, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
instr->addSrcReg(rs2, RegType::Float);
|
||||
instr->addSrcReg(rs3, RegType::Float);
|
||||
}
|
||||
instr->setFunc2(func2);
|
||||
instr->setFunc3(func3);
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
@@ -5,12 +18,12 @@
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ArchDef;
|
||||
class Arch;
|
||||
class Instr;
|
||||
|
||||
class Decoder {
|
||||
public:
|
||||
Decoder(const ArchDef &);
|
||||
Decoder(const Arch &);
|
||||
|
||||
std::shared_ptr<Instr> decode(uint32_t code) const;
|
||||
};
|
||||
|
||||
141
sim/simx/dispatcher.h
Normal file
141
sim/simx/dispatcher.h
Normal file
@@ -0,0 +1,141 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
#include <queue>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Dispatcher : public SimObject<Dispatcher> {
|
||||
public:
|
||||
std::vector<SimPort<pipeline_trace_t*>> Outputs;
|
||||
|
||||
Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes)
|
||||
: SimObject<Dispatcher>(ctx, "Dispatcher")
|
||||
, Outputs(ISSUE_WIDTH, this)
|
||||
, Inputs_(ISSUE_WIDTH, this)
|
||||
, arch_(arch)
|
||||
, queues_(ISSUE_WIDTH, std::queue<pipeline_trace_t*>())
|
||||
, buf_size_(buf_size)
|
||||
, block_size_(block_size)
|
||||
, num_lanes_(num_lanes)
|
||||
, batch_count_(ISSUE_WIDTH / block_size)
|
||||
, pid_count_(arch.num_threads() / num_lanes)
|
||||
, batch_idx_(0)
|
||||
, start_p_(block_size, 0)
|
||||
{}
|
||||
|
||||
virtual ~Dispatcher() {}
|
||||
|
||||
virtual void reset() {
|
||||
batch_idx_ = 0;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
start_p_.at(b) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void tick() {
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
auto& queue = queues_.at(i);
|
||||
if (queue.empty())
|
||||
continue;
|
||||
auto trace = queue.front();
|
||||
Inputs_.at(i).send(trace, 1);
|
||||
queue.pop();
|
||||
}
|
||||
|
||||
uint32_t block_sent = 0;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
uint32_t i = batch_idx_ * block_size_ + b;
|
||||
auto& input = Inputs_.at(i);
|
||||
if (input.empty()) {
|
||||
++block_sent;
|
||||
continue;
|
||||
}
|
||||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
if (pid_count_ != 1) {
|
||||
auto start_p = start_p_.at(b);
|
||||
if (start_p == -1) {
|
||||
++block_sent;
|
||||
continue;
|
||||
}
|
||||
int start(-1), end(-1);
|
||||
for (uint32_t j = start_p * num_lanes_, n = arch_.num_threads(); j < n; ++j) {
|
||||
if (!trace->tmask.test(j))
|
||||
continue;
|
||||
if (start == -1)
|
||||
start = j;
|
||||
end = j;
|
||||
}
|
||||
start /= num_lanes_;
|
||||
end /= num_lanes_;
|
||||
auto new_trace = new pipeline_trace_t(*trace);
|
||||
new_trace->tmask.reset();
|
||||
for (int j = start * num_lanes_, n = j + num_lanes_; j < n; ++j) {
|
||||
new_trace->tmask[j] = trace->tmask[j];
|
||||
}
|
||||
new_trace->pid = start;
|
||||
new_trace->sop = (start_p == 0);
|
||||
if (start == end) {
|
||||
new_trace->eop = 1;
|
||||
start_p_.at(b) = -1;
|
||||
input.pop();
|
||||
++block_sent;
|
||||
delete trace;
|
||||
} else {
|
||||
new_trace->eop = 0;
|
||||
start_p_.at(b) = start + 1;
|
||||
}
|
||||
output.send(new_trace, 1);
|
||||
DT(3, "pipeline-dispatch: " << *new_trace);
|
||||
} else {
|
||||
trace->pid = 0;
|
||||
input.pop();
|
||||
output.send(trace, 1);
|
||||
DT(3, "pipeline-dispatch: " << *trace);
|
||||
++block_sent;
|
||||
}
|
||||
}
|
||||
if (block_sent == block_size_) {
|
||||
batch_idx_ = (batch_idx_ + 1) % batch_count_;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
start_p_.at(b) = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
bool push(uint32_t issue_index, pipeline_trace_t* trace) {
|
||||
auto& queue = queues_.at(issue_index);
|
||||
if (queue.size() >= buf_size_)
|
||||
return false;
|
||||
queue.push(trace);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<SimPort<pipeline_trace_t*>> Inputs_;
|
||||
const Arch& arch_;
|
||||
std::vector<std::queue<pipeline_trace_t*>> queues_;
|
||||
uint32_t buf_size_;
|
||||
uint32_t block_size_;
|
||||
uint32_t num_lanes_;
|
||||
uint32_t batch_count_;
|
||||
uint32_t pid_count_;
|
||||
uint32_t batch_idx_;
|
||||
std::vector<int> start_p_;
|
||||
};
|
||||
|
||||
}
|
||||
341
sim/simx/exe_unit.cpp
Normal file
341
sim/simx/exe_unit.cpp
Normal file
@@ -0,0 +1,341 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "exe_unit.h"
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <util.h>
|
||||
#include "debug.h"
|
||||
#include "core.h"
|
||||
#include "constants.h"
|
||||
#include "cache_sim.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
AluUnit::AluUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "ALU") {}
|
||||
|
||||
void AluUnit::tick() {
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
auto& input = Inputs.at(i);
|
||||
if (input.empty())
|
||||
continue;
|
||||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
switch (trace->alu_type) {
|
||||
case AluType::ARITH:
|
||||
case AluType::BRANCH:
|
||||
case AluType::SYSCALL:
|
||||
case AluType::IMUL:
|
||||
output.send(trace, LATENCY_IMUL+1);
|
||||
break;
|
||||
case AluType::IDIV:
|
||||
output.send(trace, XLEN+1);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->alu_type << ", " << *trace);
|
||||
if (trace->eop && trace->fetch_stall) {
|
||||
assert(core_->stalled_warps_.test(trace->wid));
|
||||
core_->stalled_warps_.reset(trace->wid);
|
||||
}
|
||||
auto time = input.pop();
|
||||
core_->perf_stats_.alu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FpuUnit::FpuUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "FPU") {}
|
||||
|
||||
void FpuUnit::tick() {
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
auto& input = Inputs.at(i);
|
||||
if (input.empty())
|
||||
continue;
|
||||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
switch (trace->fpu_type) {
|
||||
case FpuType::FNCP:
|
||||
output.send(trace, 2);
|
||||
break;
|
||||
case FpuType::FMA:
|
||||
output.send(trace, LATENCY_FMA+1);
|
||||
break;
|
||||
case FpuType::FDIV:
|
||||
output.send(trace, LATENCY_FDIV+1);
|
||||
break;
|
||||
case FpuType::FSQRT:
|
||||
output.send(trace, LATENCY_FSQRT+1);
|
||||
break;
|
||||
case FpuType::FCVT:
|
||||
output.send(trace, LATENCY_FCVT+1);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->fpu_type << ", " << *trace);
|
||||
auto time = input.pop();
|
||||
core_->perf_stats_.fpu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
LsuUnit::LsuUnit(const SimContext& ctx, Core* core)
|
||||
: ExeUnit(ctx, core, "LSU")
|
||||
, pending_rd_reqs_(LSUQ_SIZE)
|
||||
, num_lanes_(NUM_LSU_LANES)
|
||||
, pending_loads_(0)
|
||||
, fence_lock_(false)
|
||||
, input_idx_(0)
|
||||
{}
|
||||
|
||||
void LsuUnit::reset() {
|
||||
pending_rd_reqs_.clear();
|
||||
pending_loads_ = 0;
|
||||
fence_lock_ = false;
|
||||
}
|
||||
|
||||
void LsuUnit::tick() {
|
||||
core_->perf_stats_.load_latency += pending_loads_;
|
||||
|
||||
// handle dcache response
|
||||
for (uint32_t t = 0; t < num_lanes_; ++t) {
|
||||
auto& dcache_rsp_port = core_->dcache_rsp_ports.at(t);
|
||||
if (dcache_rsp_port.empty())
|
||||
continue;
|
||||
auto& mem_rsp = dcache_rsp_port.front();
|
||||
auto& entry = pending_rd_reqs_.at(mem_rsp.tag);
|
||||
auto trace = entry.trace;
|
||||
DT(3, "dcache-rsp: tag=" << mem_rsp.tag << ", type=" << trace->lsu_type
|
||||
<< ", tid=" << t << ", " << *trace);
|
||||
assert(entry.count);
|
||||
--entry.count; // track remaining addresses
|
||||
if (0 == entry.count) {
|
||||
int iw = trace->wid % ISSUE_WIDTH;
|
||||
auto& output = Outputs.at(iw);
|
||||
output.send(trace, 1);
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
dcache_rsp_port.pop();
|
||||
--pending_loads_;
|
||||
}
|
||||
|
||||
// handle shared memory response
|
||||
for (uint32_t t = 0; t < num_lanes_; ++t) {
|
||||
auto& smem_rsp_port = core_->sharedmem_->Outputs.at(t);
|
||||
if (smem_rsp_port.empty())
|
||||
continue;
|
||||
auto& mem_rsp = smem_rsp_port.front();
|
||||
auto& entry = pending_rd_reqs_.at(mem_rsp.tag);
|
||||
auto trace = entry.trace;
|
||||
DT(3, "smem-rsp: tag=" << mem_rsp.tag << ", type=" << trace->lsu_type << ", tid=" << t << ", " << *trace);
|
||||
assert(entry.count);
|
||||
--entry.count; // track remaining addresses
|
||||
if (0 == entry.count) {
|
||||
int iw = trace->wid % ISSUE_WIDTH;
|
||||
auto& output = Outputs.at(iw);
|
||||
output.send(trace, 1);
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
smem_rsp_port.pop();
|
||||
--pending_loads_;
|
||||
}
|
||||
|
||||
if (fence_lock_) {
|
||||
// wait for all pending memory operations to complete
|
||||
if (!pending_rd_reqs_.empty())
|
||||
return;
|
||||
int iw = fence_state_->wid % ISSUE_WIDTH;
|
||||
auto& output = Outputs.at(iw);
|
||||
output.send(fence_state_, 1);
|
||||
fence_lock_ = false;
|
||||
DT(3, "fence-unlock: " << fence_state_);
|
||||
}
|
||||
|
||||
// check input queue
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
int iw = (input_idx_ + i) % ISSUE_WIDTH;
|
||||
auto& input = Inputs.at(iw);
|
||||
if (input.empty())
|
||||
continue;
|
||||
auto& output = Outputs.at(iw);
|
||||
auto trace = input.front();
|
||||
auto trace_data = std::dynamic_pointer_cast<LsuTraceData>(trace->data);
|
||||
|
||||
auto t0 = trace->pid * num_lanes_;
|
||||
|
||||
if (trace->lsu_type == LsuType::FENCE) {
|
||||
// schedule fence lock
|
||||
fence_state_ = trace;
|
||||
fence_lock_ = true;
|
||||
DT(3, "fence-lock: " << *trace);
|
||||
// remove input
|
||||
auto time = input.pop();
|
||||
core_->perf_stats_.lsu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
break;
|
||||
}
|
||||
|
||||
// check pending queue capacity
|
||||
if (pending_rd_reqs_.full()) {
|
||||
if (!trace->log_once(true)) {
|
||||
DT(3, "*** " << this->name() << "-lsu-queue-stall: " << *trace);
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
trace->log_once(false);
|
||||
}
|
||||
|
||||
bool is_write = (trace->lsu_type == LsuType::STORE);
|
||||
|
||||
// duplicates detection
|
||||
bool is_dup = false;
|
||||
if (trace->tmask.test(t0)) {
|
||||
uint64_t addr_mask = sizeof(uint32_t)-1;
|
||||
uint32_t addr0 = trace_data->mem_addrs.at(0).addr & ~addr_mask;
|
||||
uint32_t matches = 1;
|
||||
for (uint32_t t = 1; t < num_lanes_; ++t) {
|
||||
if (!trace->tmask.test(t0 + t))
|
||||
continue;
|
||||
auto mem_addr = trace_data->mem_addrs.at(t).addr & ~addr_mask;
|
||||
matches += (addr0 == mem_addr);
|
||||
}
|
||||
is_dup = (matches == trace->tmask.count());
|
||||
}
|
||||
|
||||
uint32_t addr_count;
|
||||
if (is_dup) {
|
||||
addr_count = 1;
|
||||
} else {
|
||||
addr_count = trace->tmask.count();
|
||||
}
|
||||
|
||||
auto tag = pending_rd_reqs_.allocate({trace, addr_count});
|
||||
|
||||
for (uint32_t t = 0; t < num_lanes_; ++t) {
|
||||
if (!trace->tmask.test(t0 + t))
|
||||
continue;
|
||||
|
||||
auto& dcache_req_port = core_->dcache_req_ports.at(t);
|
||||
auto mem_addr = trace_data->mem_addrs.at(t);
|
||||
auto type = core_->get_addr_type(mem_addr.addr);
|
||||
|
||||
MemReq mem_req;
|
||||
mem_req.addr = mem_addr.addr;
|
||||
mem_req.write = is_write;
|
||||
mem_req.type = type;
|
||||
mem_req.tag = tag;
|
||||
mem_req.cid = trace->cid;
|
||||
mem_req.uuid = trace->uuid;
|
||||
|
||||
dcache_req_port.send(mem_req, 2);
|
||||
DT(3, "dcache-req: addr=0x" << std::hex << mem_req.addr << ", tag=" << tag
|
||||
<< ", lsu_type=" << trace->lsu_type << ", tid=" << t << ", addr_type=" << mem_req.type << ", " << *trace);
|
||||
|
||||
++pending_loads_;
|
||||
++core_->perf_stats_.loads;
|
||||
if (is_dup)
|
||||
break;
|
||||
}
|
||||
|
||||
// do not wait on writes
|
||||
if (is_write) {
|
||||
pending_rd_reqs_.release(tag);
|
||||
output.send(trace, 1);
|
||||
++core_->perf_stats_.stores;
|
||||
}
|
||||
|
||||
// remove input
|
||||
auto time = input.pop();
|
||||
core_->perf_stats_.lsu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
|
||||
break; // single block
|
||||
}
|
||||
++input_idx_;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
SfuUnit::SfuUnit(const SimContext& ctx, Core* core)
|
||||
: ExeUnit(ctx, core, "SFU")
|
||||
{}
|
||||
|
||||
void SfuUnit::tick() {
|
||||
// handle pending responses
|
||||
for (auto pending_rsp : pending_rsps_) {
|
||||
if (pending_rsp->empty())
|
||||
continue;
|
||||
auto trace = pending_rsp->front();
|
||||
if (trace->cid != core_->id())
|
||||
continue;
|
||||
int iw = trace->wid % ISSUE_WIDTH;
|
||||
auto& output = Outputs.at(iw);
|
||||
output.send(trace, 1);
|
||||
pending_rsp->pop();
|
||||
}
|
||||
|
||||
// check input queue
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
int iw = (input_idx_ + i) % ISSUE_WIDTH;
|
||||
auto& input = Inputs.at(iw);
|
||||
if (input.empty())
|
||||
continue;
|
||||
auto& output = Outputs.at(iw);
|
||||
auto trace = input.front();
|
||||
auto sfu_type = trace->sfu_type;
|
||||
bool release_warp = trace->fetch_stall;
|
||||
|
||||
switch (sfu_type) {
|
||||
case SfuType::TMC:
|
||||
case SfuType::WSPAWN:
|
||||
case SfuType::SPLIT:
|
||||
case SfuType::JOIN:
|
||||
case SfuType::PRED:
|
||||
case SfuType::CSRRW:
|
||||
case SfuType::CSRRS:
|
||||
case SfuType::CSRRC:
|
||||
output.send(trace, 1);
|
||||
break;
|
||||
case SfuType::BAR: {
|
||||
output.send(trace, 1);
|
||||
auto trace_data = std::dynamic_pointer_cast<SFUTraceData>(trace->data);
|
||||
if (trace->eop) {
|
||||
core_->barrier(trace_data->bar.id, trace_data->bar.count, trace->wid);
|
||||
}
|
||||
release_warp = false;
|
||||
} break;
|
||||
case SfuType::CMOV:
|
||||
output.send(trace, 3);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
||||
DT(3, "pipeline-execute: op=" << trace->sfu_type << ", " << *trace);
|
||||
if (trace->eop && release_warp) {
|
||||
assert(core_->stalled_warps_.test(trace->wid));
|
||||
core_->stalled_warps_.reset(trace->wid);
|
||||
}
|
||||
|
||||
auto time = input.pop();
|
||||
auto stalls = (SimPlatform::instance().cycles() - time);
|
||||
|
||||
core_->perf_stats_.sfu_stalls += stalls;
|
||||
|
||||
break; // single block
|
||||
}
|
||||
++input_idx_;
|
||||
}
|
||||
@@ -1,8 +1,21 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "pipeline.h"
|
||||
#include "cache.h"
|
||||
#include "cache_sim.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
@@ -10,13 +23,13 @@ class Core;
|
||||
|
||||
class ExeUnit : public SimObject<ExeUnit> {
|
||||
public:
|
||||
SimPort<pipeline_trace_t*> Input;
|
||||
SimPort<pipeline_trace_t*> Output;
|
||||
std::vector<SimPort<pipeline_trace_t*>> Inputs;
|
||||
std::vector<SimPort<pipeline_trace_t*>> Outputs;
|
||||
|
||||
ExeUnit(const SimContext& ctx, Core* core, const char* name)
|
||||
: SimObject<ExeUnit>(ctx, name)
|
||||
, Input(this)
|
||||
, Output(this)
|
||||
, Inputs(ISSUE_WIDTH, this)
|
||||
, Outputs(ISSUE_WIDTH, this)
|
||||
, core_(core)
|
||||
{}
|
||||
|
||||
@@ -32,28 +45,25 @@ protected:
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class NopUnit : public ExeUnit {
|
||||
public:
|
||||
NopUnit(const SimContext& ctx, Core*);
|
||||
|
||||
void tick();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class LsuUnit : public ExeUnit {
|
||||
private:
|
||||
uint32_t num_threads_;
|
||||
HashTable<std::pair<pipeline_trace_t*, uint32_t>> pending_rd_reqs_;
|
||||
pipeline_trace_t* fence_state_;
|
||||
bool fence_lock_;
|
||||
|
||||
public:
|
||||
LsuUnit(const SimContext& ctx, Core*);
|
||||
|
||||
void reset();
|
||||
|
||||
void tick();
|
||||
|
||||
private:
|
||||
struct pending_req_t {
|
||||
pipeline_trace_t* trace;
|
||||
uint32_t count;
|
||||
};
|
||||
HashTable<pending_req_t> pending_rd_reqs_;
|
||||
uint32_t num_lanes_;
|
||||
pipeline_trace_t* fence_state_;
|
||||
uint64_t pending_loads_;
|
||||
bool fence_lock_;
|
||||
uint32_t input_idx_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -67,15 +77,6 @@ public:
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class CsrUnit : public ExeUnit {
|
||||
public:
|
||||
CsrUnit(const SimContext& ctx, Core*);
|
||||
|
||||
void tick();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class FpuUnit : public ExeUnit {
|
||||
public:
|
||||
FpuUnit(const SimContext& ctx, Core*);
|
||||
@@ -85,19 +86,15 @@ public:
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class GpuUnit : public ExeUnit {
|
||||
private:
|
||||
uint32_t num_threads_;
|
||||
HashTable<std::pair<pipeline_trace_t*, uint32_t>> pending_tex_reqs_;
|
||||
|
||||
bool processTexRequest(pipeline_trace_t* trace);
|
||||
|
||||
class SfuUnit : public ExeUnit {
|
||||
public:
|
||||
GpuUnit(const SimContext& ctx, Core*);
|
||||
|
||||
void reset();
|
||||
SfuUnit(const SimContext& ctx, Core*);
|
||||
|
||||
void tick();
|
||||
|
||||
private:
|
||||
std::vector<SimPort<pipeline_trace_t*>*> pending_rsps_;
|
||||
uint32_t input_idx_;
|
||||
};
|
||||
|
||||
}
|
||||
1173
sim/simx/execute.cpp
1173
sim/simx/execute.cpp
File diff suppressed because it is too large
Load Diff
@@ -1,383 +0,0 @@
|
||||
#include "exeunit.h"
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <util.h>
|
||||
#include "debug.h"
|
||||
#include "core.h"
|
||||
#include "constants.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
NopUnit::NopUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "NOP") {}
|
||||
|
||||
void NopUnit::tick() {
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
Output.send(trace, 1);
|
||||
Input.pop();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
LsuUnit::LsuUnit(const SimContext& ctx, Core* core)
|
||||
: ExeUnit(ctx, core, "LSU")
|
||||
, num_threads_(core->arch().num_threads())
|
||||
, pending_rd_reqs_(LSUQ_SIZE)
|
||||
, fence_lock_(false)
|
||||
{}
|
||||
|
||||
void LsuUnit::reset() {
|
||||
pending_rd_reqs_.clear();
|
||||
fence_lock_ = false;
|
||||
}
|
||||
|
||||
void LsuUnit::tick() {
|
||||
// handle dcache response
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
auto& dcache_rsp_port = core_->dcache_switch_.at(t)->RspOut.at(0);
|
||||
if (dcache_rsp_port.empty())
|
||||
continue;
|
||||
auto& mem_rsp = dcache_rsp_port.front();
|
||||
auto& entry = pending_rd_reqs_.at(mem_rsp.tag);
|
||||
auto trace = entry.first;
|
||||
DT(3, "dcache-rsp: tag=" << mem_rsp.tag << ", type=" << trace->lsu.type
|
||||
<< ", tid=" << t << ", " << *trace);
|
||||
assert(entry.second);
|
||||
--entry.second; // track remaining blocks
|
||||
if (0 == entry.second) {
|
||||
Output.send(trace, 1);
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
dcache_rsp_port.pop();
|
||||
}
|
||||
|
||||
// handle shared memory response
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
auto& smem_rsp_port = core_->shared_mem_->Outputs.at(t);
|
||||
if (smem_rsp_port.empty())
|
||||
continue;
|
||||
auto& mem_rsp = smem_rsp_port.front();
|
||||
auto& entry = pending_rd_reqs_.at(mem_rsp.tag);
|
||||
auto trace = entry.first;
|
||||
DT(3, "smem-rsp: tag=" << mem_rsp.tag << ", type=" << trace->lsu.type
|
||||
<< ", tid=" << t << ", " << *trace);
|
||||
assert(entry.second);
|
||||
--entry.second; // track remaining blocks
|
||||
if (0 == entry.second) {
|
||||
Output.send(trace, 1);
|
||||
pending_rd_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
smem_rsp_port.pop();
|
||||
}
|
||||
|
||||
if (fence_lock_) {
|
||||
// wait for all pending memory operations to complete
|
||||
if (!pending_rd_reqs_.empty())
|
||||
return;
|
||||
Output.send(fence_state_, 1);
|
||||
fence_lock_ = false;
|
||||
DT(3, "fence-unlock: " << fence_state_);
|
||||
}
|
||||
|
||||
// check input queue
|
||||
if (Input.empty())
|
||||
return;
|
||||
|
||||
auto trace = Input.front();
|
||||
|
||||
if (trace->lsu.type == LsuType::FENCE) {
|
||||
// schedule fence lock
|
||||
fence_state_ = trace;
|
||||
fence_lock_ = true;
|
||||
DT(3, "fence-lock: " << *trace);
|
||||
// remove input
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.lsu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
return;
|
||||
}
|
||||
|
||||
// check pending queue capacity
|
||||
if (pending_rd_reqs_.full()) {
|
||||
if (!trace->suspend()) {
|
||||
DT(3, "*** lsu-queue-stall: " << *trace);
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
trace->resume();
|
||||
}
|
||||
|
||||
bool is_write = (trace->lsu.type == LsuType::STORE);
|
||||
|
||||
// duplicates detection
|
||||
bool is_dup = false;
|
||||
if (trace->tmask.test(0)) {
|
||||
uint64_t addr_mask = sizeof(uint32_t)-1;
|
||||
uint32_t addr0 = trace->mem_addrs.at(0).at(0).addr & ~addr_mask;
|
||||
uint32_t matches = 1;
|
||||
for (uint32_t t = 1; t < num_threads_; ++t) {
|
||||
if (!trace->tmask.test(t))
|
||||
continue;
|
||||
auto mem_addr = trace->mem_addrs.at(t).at(0).addr & ~addr_mask;
|
||||
matches += (addr0 == mem_addr);
|
||||
}
|
||||
is_dup = (matches == trace->tmask.count());
|
||||
}
|
||||
|
||||
uint32_t valid_addrs = 0;
|
||||
if (is_dup) {
|
||||
valid_addrs = 1;
|
||||
} else {
|
||||
for (auto& mem_addr : trace->mem_addrs) {
|
||||
valid_addrs += mem_addr.size();
|
||||
}
|
||||
}
|
||||
|
||||
auto tag = pending_rd_reqs_.allocate({trace, valid_addrs});
|
||||
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
if (!trace->tmask.test(t))
|
||||
continue;
|
||||
|
||||
auto& dcache_req_port = core_->dcache_switch_.at(t)->ReqIn.at(0);
|
||||
auto mem_addr = trace->mem_addrs.at(t).at(0);
|
||||
auto type = get_addr_type(mem_addr.addr, mem_addr.size);
|
||||
|
||||
MemReq mem_req;
|
||||
mem_req.addr = mem_addr.addr;
|
||||
mem_req.write = is_write;
|
||||
mem_req.non_cacheable = (type == AddrType::IO);
|
||||
mem_req.tag = tag;
|
||||
mem_req.core_id = trace->cid;
|
||||
mem_req.uuid = trace->uuid;
|
||||
|
||||
if (type == AddrType::Shared) {
|
||||
core_->shared_mem_->Inputs.at(t).send(mem_req, 2);
|
||||
DT(3, "smem-req: addr=" << std::hex << mem_addr.addr << ", tag=" << tag
|
||||
<< ", type=" << trace->lsu.type << ", tid=" << t << ", " << *trace);
|
||||
} else {
|
||||
dcache_req_port.send(mem_req, 2);
|
||||
DT(3, "dcache-req: addr=" << std::hex << mem_addr.addr << ", tag=" << tag
|
||||
<< ", type=" << trace->lsu.type << ", tid=" << t << ", nc=" << mem_req.non_cacheable << ", " << *trace);
|
||||
}
|
||||
|
||||
if (is_dup)
|
||||
break;
|
||||
}
|
||||
|
||||
// do not wait on writes
|
||||
if (is_write) {
|
||||
pending_rd_reqs_.release(tag);
|
||||
Output.send(trace, 1);
|
||||
}
|
||||
|
||||
// remove input
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.lsu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
AluUnit::AluUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "ALU") {}
|
||||
|
||||
void AluUnit::tick() {
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
switch (trace->alu.type) {
|
||||
case AluType::ARITH:
|
||||
case AluType::BRANCH:
|
||||
case AluType::SYSCALL:
|
||||
case AluType::CMOV:
|
||||
Output.send(trace, 1);
|
||||
break;
|
||||
case AluType::IMUL:
|
||||
Output.send(trace, LATENCY_IMUL+1);
|
||||
break;
|
||||
case AluType::IDIV:
|
||||
Output.send(trace, XLEN+1);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->alu.type << ", " << *trace);
|
||||
if (trace->fetch_stall) {
|
||||
core_->stalled_warps_.reset(trace->wid);
|
||||
}
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.alu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
CsrUnit::CsrUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "CSR") {}
|
||||
|
||||
void CsrUnit::tick() {
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
Output.send(trace, 1);
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.csr_stalls += (SimPlatform::instance().cycles() - time);
|
||||
DT(3, "pipeline-execute: op=CSR, " << *trace);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
FpuUnit::FpuUnit(const SimContext& ctx, Core* core) : ExeUnit(ctx, core, "FPU") {}
|
||||
|
||||
void FpuUnit::tick() {
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
switch (trace->fpu.type) {
|
||||
case FpuType::FNCP:
|
||||
Output.send(trace, 2);
|
||||
break;
|
||||
case FpuType::FMA:
|
||||
Output.send(trace, LATENCY_FMA+1);
|
||||
break;
|
||||
case FpuType::FDIV:
|
||||
Output.send(trace, LATENCY_FDIV+1);
|
||||
break;
|
||||
case FpuType::FSQRT:
|
||||
Output.send(trace, LATENCY_FSQRT+1);
|
||||
break;
|
||||
case FpuType::FCVT:
|
||||
Output.send(trace, LATENCY_FCVT+1);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
DT(3, "pipeline-execute: op=" << trace->fpu.type << ", " << *trace);
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.fpu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
GpuUnit::GpuUnit(const SimContext& ctx, Core* core)
|
||||
: ExeUnit(ctx, core, "GPU")
|
||||
, num_threads_(core->arch().num_threads())
|
||||
, pending_tex_reqs_(TEXQ_SIZE)
|
||||
{}
|
||||
|
||||
void GpuUnit::reset() {
|
||||
pending_tex_reqs_.clear();
|
||||
}
|
||||
|
||||
void GpuUnit::tick() {
|
||||
#ifdef EXT_TEX_ENABLE
|
||||
// handle memory response
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
auto& dcache_rsp_port = core_->dcache_switch_.at(t)->RspOut.at(1);
|
||||
if (dcache_rsp_port.empty())
|
||||
continue;
|
||||
auto& mem_rsp = dcache_rsp_port.front();
|
||||
auto& entry = pending_tex_reqs_.at(mem_rsp.tag);
|
||||
auto trace = entry.first;
|
||||
DT(3, "tex-rsp: tag=" << mem_rsp.tag << ", tid=" << t << ", " << *trace);
|
||||
assert(entry.second);
|
||||
--entry.second; // track remaining blocks
|
||||
if (0 == entry.second) {
|
||||
Output.send(trace, 1);
|
||||
pending_tex_reqs_.release(mem_rsp.tag);
|
||||
}
|
||||
dcache_rsp_port.pop();
|
||||
}
|
||||
#endif
|
||||
|
||||
// check input queue
|
||||
if (Input.empty())
|
||||
return;
|
||||
|
||||
auto trace = Input.front();
|
||||
|
||||
bool issued = false;
|
||||
|
||||
switch (trace->gpu.type) {
|
||||
case GpuType::TMC:
|
||||
Output.send(trace, 1);
|
||||
core_->active_warps_.set(trace->wid, trace->gpu.active_warps.test(trace->wid));
|
||||
issued = true;
|
||||
break;
|
||||
case GpuType::WSPAWN:
|
||||
Output.send(trace, 1);
|
||||
core_->active_warps_ = trace->gpu.active_warps;
|
||||
issued = true;
|
||||
break;
|
||||
case GpuType::SPLIT:
|
||||
case GpuType::JOIN:
|
||||
Output.send(trace, 1);
|
||||
issued = true;
|
||||
break;
|
||||
case GpuType::BAR:
|
||||
Output.send(trace, 1);
|
||||
if (trace->gpu.active_warps != 0)
|
||||
core_->active_warps_ |= trace->gpu.active_warps;
|
||||
else
|
||||
core_->active_warps_.reset(trace->wid);
|
||||
issued = true;
|
||||
break;
|
||||
case GpuType::TEX:
|
||||
if (this->processTexRequest(trace))
|
||||
issued = true;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
||||
if (issued) {
|
||||
DT(3, "pipeline-execute: op=" << trace->gpu.type << ", " << *trace);
|
||||
if (trace->fetch_stall) {
|
||||
core_->stalled_warps_.reset(trace->wid);
|
||||
}
|
||||
auto time = Input.pop();
|
||||
core_->perf_stats_.fpu_stalls += (SimPlatform::instance().cycles() - time);
|
||||
}
|
||||
}
|
||||
|
||||
bool GpuUnit::processTexRequest(pipeline_trace_t* trace) {
|
||||
// check pending queue capacity
|
||||
if (pending_tex_reqs_.full()) {
|
||||
if (!trace->suspend()) {
|
||||
DT(3, "*** tex-queue-stall: " << *trace);
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
trace->resume();
|
||||
}
|
||||
|
||||
// send memory request
|
||||
|
||||
uint32_t valid_addrs = 0;
|
||||
for (auto& mem_addr : trace->mem_addrs) {
|
||||
valid_addrs += mem_addr.size();
|
||||
}
|
||||
|
||||
auto tag = pending_tex_reqs_.allocate({trace, valid_addrs});
|
||||
|
||||
for (uint32_t t = 0; t < num_threads_; ++t) {
|
||||
if (!trace->tmask.test(t))
|
||||
continue;
|
||||
|
||||
auto& dcache_req_port = core_->dcache_switch_.at(t)->ReqIn.at(1);
|
||||
for (auto& mem_addr : trace->mem_addrs.at(t)) {
|
||||
MemReq mem_req;
|
||||
mem_req.addr = mem_addr.addr;
|
||||
mem_req.write = (trace->lsu.type == LsuType::STORE);
|
||||
mem_req.tag = tag;
|
||||
mem_req.core_id = core_->id();
|
||||
mem_req.uuid = trace->uuid;
|
||||
dcache_req_port.send(mem_req, 3);
|
||||
DT(3, "tex-req: addr=" << std::hex << mem_addr.addr << ", tag=" << tag
|
||||
<< ", tid=" << t << ", "<< trace);
|
||||
++ core_->perf_stats_.tex_reads;
|
||||
++ core_->perf_stats_.tex_latency += pending_tex_reqs_.size();
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
@@ -6,10 +19,6 @@
|
||||
namespace vortex {
|
||||
|
||||
class IBuffer {
|
||||
private:
|
||||
std::queue<pipeline_trace_t*> entries_;
|
||||
uint32_t capacity_;
|
||||
|
||||
public:
|
||||
IBuffer(uint32_t size)
|
||||
: capacity_(size)
|
||||
@@ -39,6 +48,10 @@ public:
|
||||
std::queue<pipeline_trace_t*> empty;
|
||||
std::swap(entries_, empty );
|
||||
}
|
||||
|
||||
private:
|
||||
std::queue<pipeline_trace_t*> entries_;
|
||||
uint32_t capacity_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
@@ -7,7 +20,7 @@ namespace vortex {
|
||||
class Warp;
|
||||
|
||||
enum Opcode {
|
||||
NOP = 0,
|
||||
NONE = 0,
|
||||
R_INST = 0x33,
|
||||
L_INST = 0x3,
|
||||
I_INST = 0x13,
|
||||
@@ -19,6 +32,7 @@ enum Opcode {
|
||||
JALR_INST = 0x67,
|
||||
SYS_INST = 0x73,
|
||||
FENCE = 0x0f,
|
||||
AMO = 0x2f,
|
||||
// F Extension
|
||||
FL = 0x7,
|
||||
FS = 0x27,
|
||||
@@ -26,19 +40,20 @@ enum Opcode {
|
||||
FMADD = 0x43,
|
||||
FMSUB = 0x47,
|
||||
FMNMSUB = 0x4b,
|
||||
FMNMADD = 0x4f,
|
||||
// Vector Extension
|
||||
VSET = 0x57,
|
||||
// GPGPU Extension
|
||||
GPGPU = 0x6b,
|
||||
GPU = 0x5b,
|
||||
// RV64 Standard Extensions
|
||||
FMNMADD = 0x4f,
|
||||
// RV64 Standard Extension
|
||||
R_INST_W = 0x3b,
|
||||
I_INST_W = 0x1b,
|
||||
// Vector Extension
|
||||
VSET = 0x57,
|
||||
// Custom Extensions
|
||||
EXT1 = 0x0b,
|
||||
EXT2 = 0x2b,
|
||||
EXT3 = 0x5b,
|
||||
EXT4 = 0x7b
|
||||
};
|
||||
|
||||
enum InstType {
|
||||
N_TYPE,
|
||||
enum InstType {
|
||||
R_TYPE,
|
||||
I_TYPE,
|
||||
S_TYPE,
|
||||
@@ -52,25 +67,45 @@ enum InstType {
|
||||
class Instr {
|
||||
public:
|
||||
Instr()
|
||||
: opcode_(Opcode::NOP)
|
||||
: opcode_(Opcode::NONE)
|
||||
, num_rsrcs_(0)
|
||||
, has_imm_(false)
|
||||
, rdest_type_(RegType::None)
|
||||
, imm_(0)
|
||||
, rdest_(0)
|
||||
, func2_(0)
|
||||
, func3_(0)
|
||||
, func6_(0)
|
||||
, func7_(0) {
|
||||
, func7_(0)
|
||||
, vmask_(0)
|
||||
, vlsWidth_(0)
|
||||
, vMop_(0)
|
||||
, vNf_(0)
|
||||
, vs3_(0)
|
||||
, vlmul_(0)
|
||||
, vsew_(0)
|
||||
, vediv_(0) {
|
||||
for (uint32_t i = 0; i < MAX_REG_SOURCES; ++i) {
|
||||
rsrc_type_[i] = RegType::None;
|
||||
rsrc_[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void setOpcode(Opcode opcode) { opcode_ = opcode; }
|
||||
void setDestReg(uint32_t destReg, RegType type) { rdest_type_ = type; rdest_ = destReg; }
|
||||
void setSrcReg(uint32_t srcReg, RegType type) { rsrc_type_[num_rsrcs_] = type; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setDestVReg(uint32_t destReg) { rdest_type_ = RegType::Vector; rdest_ = destReg; }
|
||||
void setSrcVReg(uint32_t srcReg) { rsrc_type_[num_rsrcs_] = RegType::Vector; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setDestReg(uint32_t destReg, RegType type) {
|
||||
rdest_type_ = type;
|
||||
rdest_ = destReg;
|
||||
}
|
||||
void addSrcReg(uint32_t srcReg, RegType type) {
|
||||
rsrc_type_[num_rsrcs_] = type;
|
||||
rsrc_[num_rsrcs_] = srcReg;
|
||||
++num_rsrcs_;
|
||||
}
|
||||
void setSrcReg(uint32_t index, uint32_t srcReg, RegType type) {
|
||||
rsrc_type_[index] = type;
|
||||
rsrc_[index] = srcReg;
|
||||
num_rsrcs_ = std::max<uint32_t>(num_rsrcs_, index+1);
|
||||
}
|
||||
void setFunc2(uint32_t func2) { func2_ = func2; }
|
||||
void setFunc3(uint32_t func3) { func3_ = func3; }
|
||||
void setFunc7(uint32_t func7) { func7_ = func7; }
|
||||
@@ -85,17 +120,17 @@ public:
|
||||
void setVediv(uint32_t ediv) { vediv_ = 1 << ediv; }
|
||||
void setFunc6(uint32_t func6) { func6_ = func6; }
|
||||
|
||||
Opcode getOpcode() const { return opcode_; }
|
||||
Opcode getOpcode() const { return opcode_; }
|
||||
uint32_t getFunc2() const { return func2_; }
|
||||
uint32_t getFunc3() const { return func3_; }
|
||||
uint32_t getFunc6() const { return func6_; }
|
||||
uint32_t getFunc7() const { return func7_; }
|
||||
uint32_t getNRSrc() const { return num_rsrcs_; }
|
||||
uint32_t getRSrc(uint32_t i) const { return rsrc_[i]; }
|
||||
RegType getRSType(uint32_t i) const { return rsrc_type_[i]; }
|
||||
RegType getRSType(uint32_t i) const { return rsrc_type_[i]; }
|
||||
uint32_t getRDest() const { return rdest_; }
|
||||
RegType getRDType() const { return rdest_type_; }
|
||||
bool hasImm() const { return has_imm_; }
|
||||
RegType getRDType() const { return rdest_type_; }
|
||||
bool hasImm() const { return has_imm_; }
|
||||
uint32_t getImm() const { return imm_; }
|
||||
uint32_t getVlsWidth() const { return vlsWidth_; }
|
||||
uint32_t getVmop() const { return vMop_; }
|
||||
|
||||
@@ -1,98 +1,132 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
#include "processor.h"
|
||||
#include "archdef.h"
|
||||
#include "mem.h"
|
||||
#include "constants.h"
|
||||
#include <util.h>
|
||||
#include "args.h"
|
||||
#include "core.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Usage: [-c <cores>] [-w <warps>] [-t <threads>] [-r: riscv-test] [-s: stats] [-h: help] <program>" << std::endl;
|
||||
}
|
||||
|
||||
uint32_t num_threads = NUM_THREADS;
|
||||
uint32_t num_warps = NUM_WARPS;
|
||||
uint32_t num_cores = NUM_CORES;
|
||||
uint32_t num_clusters = NUM_CLUSTERS;
|
||||
bool showStats = false;;
|
||||
bool riscv_test = false;
|
||||
const char* program = nullptr;
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "t:w:c:g:rsh?")) != -1) {
|
||||
switch (c) {
|
||||
case 't':
|
||||
num_threads = atoi(optarg);
|
||||
break;
|
||||
case 'w':
|
||||
num_warps = atoi(optarg);
|
||||
break;
|
||||
case 'c':
|
||||
num_cores = atoi(optarg);
|
||||
break;
|
||||
case 'g':
|
||||
num_clusters = atoi(optarg);
|
||||
break;
|
||||
case 'r':
|
||||
riscv_test = true;
|
||||
break;
|
||||
case 's':
|
||||
showStats = true;
|
||||
break;
|
||||
case 'h':
|
||||
case '?':
|
||||
show_usage();
|
||||
exit(0);
|
||||
break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if (optind < argc) {
|
||||
program = argv[optind];
|
||||
std::cout << "Running " << program << "..." << std::endl;
|
||||
} else {
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int exitcode = 0;
|
||||
|
||||
std::string imgFileName;
|
||||
int num_cores(NUM_CORES * NUM_CLUSTERS);
|
||||
int num_warps(NUM_WARPS);
|
||||
int num_threads(NUM_THREADS);
|
||||
bool showHelp(false);
|
||||
bool showStats(false);
|
||||
bool riscv_test(false);
|
||||
parse_args(argc, argv);
|
||||
|
||||
// parse the command line arguments
|
||||
CommandLineArgFlag fh("-h", "--help", "show command line options", showHelp);
|
||||
CommandLineArgSetter<std::string> fi("-i", "--image", "program binary", imgFileName);
|
||||
CommandLineArgSetter<int> fc("-c", "--cores", "number of cores", num_cores);
|
||||
CommandLineArgSetter<int> fw("-w", "--warps", "number of warps", num_warps);
|
||||
CommandLineArgSetter<int> ft("-t", "--threads", "number of threads", num_threads);
|
||||
CommandLineArgFlag fr("-r", "--riscv", "enable riscv tests", riscv_test);
|
||||
CommandLineArgFlag fs("-s", "--stats", "show stats", showStats);
|
||||
|
||||
CommandLineArg::readArgs(argc - 1, argv + 1);
|
||||
|
||||
if (showHelp || imgFileName.empty()) {
|
||||
std::cout << "Vortex emulator command line arguments:\n"
|
||||
" -i, --image <filename> Program RAM image\n"
|
||||
" -c, --cores <num> Number of cores\n"
|
||||
" -w, --warps <num> Number of warps\n"
|
||||
" -t, --threads <num> Number of threads\n"
|
||||
" -r, --riscv riscv test\n"
|
||||
" -s, --stats Print stats on exit.\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::cout << "Running " << imgFileName << "..." << std::endl;
|
||||
|
||||
{
|
||||
// create processor configuation
|
||||
ArchDef arch(num_cores, num_warps, num_threads);
|
||||
Arch arch(num_threads, num_warps, num_cores, num_clusters);
|
||||
|
||||
// create memory module
|
||||
RAM ram(RAM_PAGE_SIZE);
|
||||
|
||||
// create processor
|
||||
Processor processor(arch);
|
||||
|
||||
// attach memory module
|
||||
processor.attach_ram(&ram);
|
||||
|
||||
// setup base DCRs
|
||||
const uint64_t startup_addr(STARTUP_ADDR);
|
||||
processor.write_dcr(VX_DCR_BASE_STARTUP_ADDR0, startup_addr & 0xffffffff);
|
||||
#if (XLEN == 64)
|
||||
processor.write_dcr(VX_DCR_BASE_STARTUP_ADDR1, startup_addr >> 32);
|
||||
#endif
|
||||
processor.write_dcr(VX_DCR_BASE_MPM_CLASS, 0);
|
||||
|
||||
// load program
|
||||
{
|
||||
std::string program_ext(fileExtension(imgFileName.c_str()));
|
||||
{
|
||||
std::string program_ext(fileExtension(program));
|
||||
if (program_ext == "bin") {
|
||||
ram.loadBinImage(imgFileName.c_str(), STARTUP_ADDR);
|
||||
ram.loadBinImage(program, startup_addr);
|
||||
} else if (program_ext == "hex") {
|
||||
ram.loadHexImage(imgFileName.c_str());
|
||||
ram.loadHexImage(program);
|
||||
} else {
|
||||
std::cout << "*** error: only *.bin or *.hex images supported." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// create processor
|
||||
Processor processor(arch);
|
||||
|
||||
// attach memory module
|
||||
processor.attach_ram(&ram);
|
||||
|
||||
// run simulation
|
||||
exitcode = processor.run();
|
||||
exitcode = processor.run(riscv_test);
|
||||
}
|
||||
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
}
|
||||
|
||||
if (riscv_test) {
|
||||
if (1 == exitcode) {
|
||||
std::cout << "Passed." << std::endl;
|
||||
exitcode = 0;
|
||||
} else {
|
||||
std::cout << "Failed." << std::endl;
|
||||
}
|
||||
} else {
|
||||
if (exitcode != 0) {
|
||||
std::cout << "*** error: exitcode=" << exitcode << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,17 @@
|
||||
#include "memsim.h"
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mem_sim.h"
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <stdlib.h>
|
||||
@@ -83,7 +96,7 @@ public:
|
||||
mem_req.addr,
|
||||
mem_req.write ? ramulator::Request::Type::WRITE : ramulator::Request::Type::READ,
|
||||
std::bind(&Impl::dram_callback, this, placeholders::_1, mem_req.tag, mem_req.uuid),
|
||||
mem_req.core_id
|
||||
mem_req.cid
|
||||
);
|
||||
|
||||
if (!dram_->send(dram_req))
|
||||
@@ -1,8 +1,20 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "types.h"
|
||||
#include <vector>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
61
sim/simx/operand.h
Normal file
61
sim/simx/operand.h
Normal file
@@ -0,0 +1,61 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
#include <queue>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Operand : public SimObject<Operand> {
|
||||
public:
|
||||
SimPort<pipeline_trace_t*> Input;
|
||||
SimPort<pipeline_trace_t*> Output;
|
||||
|
||||
Operand(const SimContext& ctx)
|
||||
: SimObject<Operand>(ctx, "Operand")
|
||||
, Input(this)
|
||||
, Output(this)
|
||||
{}
|
||||
|
||||
virtual ~Operand() {}
|
||||
|
||||
virtual void reset() {}
|
||||
|
||||
virtual void tick() {
|
||||
if (Input.empty())
|
||||
return;
|
||||
auto trace = Input.front();
|
||||
|
||||
int delay = 1;
|
||||
for (int i = 0; i < MAX_NUM_REGS; ++i) {
|
||||
bool is_iregs = trace->used_iregs.test(i);
|
||||
bool is_fregs = trace->used_fregs.test(i);
|
||||
bool is_vregs = trace->used_vregs.test(i);
|
||||
if (is_iregs || is_fregs || is_vregs) {
|
||||
if (is_iregs && i == 0)
|
||||
continue;
|
||||
++delay;
|
||||
}
|
||||
}
|
||||
|
||||
Output.send(trace, delay);
|
||||
|
||||
DT(3, "pipeline-operands: " << *trace);
|
||||
|
||||
Input.pop();
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -5,14 +18,38 @@
|
||||
#include <iostream>
|
||||
#include <util.h>
|
||||
#include "types.h"
|
||||
#include "archdef.h"
|
||||
#include "arch.h"
|
||||
#include "debug.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ITraceData {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<ITraceData>;
|
||||
ITraceData() {}
|
||||
virtual ~ITraceData() {}
|
||||
};
|
||||
|
||||
struct LsuTraceData : public ITraceData {
|
||||
using Ptr = std::shared_ptr<LsuTraceData>;
|
||||
std::vector<mem_addr_size_t> mem_addrs;
|
||||
LsuTraceData(uint32_t num_threads) : mem_addrs(num_threads) {}
|
||||
};
|
||||
|
||||
struct SFUTraceData : public ITraceData {
|
||||
using Ptr = std::shared_ptr<SFUTraceData>;
|
||||
struct {
|
||||
uint32_t id;
|
||||
uint32_t count;
|
||||
} bar;
|
||||
SFUTraceData(uint32_t bar_id, uint32_t bar_count) : bar{bar_id, bar_count} {}
|
||||
};
|
||||
|
||||
struct pipeline_trace_t {
|
||||
public:
|
||||
//--
|
||||
uint64_t uuid;
|
||||
const uint64_t uuid;
|
||||
const Arch& arch;
|
||||
|
||||
//--
|
||||
uint32_t cid;
|
||||
@@ -21,12 +58,9 @@ struct pipeline_trace_t {
|
||||
Word PC;
|
||||
|
||||
//--
|
||||
bool fetch_stall;
|
||||
|
||||
//--
|
||||
bool wb;
|
||||
RegType rdest_type;
|
||||
uint32_t rdest;
|
||||
RegType rdest_type;
|
||||
bool wb;
|
||||
|
||||
//--
|
||||
RegMask used_iregs;
|
||||
@@ -36,73 +70,104 @@ struct pipeline_trace_t {
|
||||
//-
|
||||
ExeType exe_type;
|
||||
|
||||
//--
|
||||
std::vector<std::vector<mem_addr_size_t>> mem_addrs;
|
||||
|
||||
//--
|
||||
union {
|
||||
struct {
|
||||
LsuType type;
|
||||
} lsu;
|
||||
struct {
|
||||
AluType type;
|
||||
} alu;
|
||||
struct {
|
||||
FpuType type;
|
||||
} fpu;
|
||||
struct {
|
||||
GpuType type;
|
||||
WarpMask active_warps;
|
||||
} gpu;
|
||||
uint32_t unit_type;
|
||||
LsuType lsu_type;
|
||||
AluType alu_type;
|
||||
FpuType fpu_type;
|
||||
SfuType sfu_type;
|
||||
};
|
||||
|
||||
bool stalled;
|
||||
ITraceData::Ptr data;
|
||||
|
||||
pipeline_trace_t(uint64_t uuid_, const ArchDef& arch) {
|
||||
uuid = uuid_;
|
||||
cid = 0;
|
||||
wid = 0;
|
||||
tmask.reset();
|
||||
PC = 0;
|
||||
fetch_stall = false;
|
||||
wb = false;
|
||||
rdest = 0;
|
||||
rdest_type = RegType::None;
|
||||
used_iregs.reset();
|
||||
used_fregs.reset();
|
||||
used_vregs.reset();
|
||||
exe_type = ExeType::NOP;
|
||||
mem_addrs.resize(arch.num_threads());
|
||||
stalled = false;
|
||||
}
|
||||
int pid;
|
||||
bool sop;
|
||||
bool eop;
|
||||
|
||||
bool suspend() {
|
||||
bool old = stalled;
|
||||
stalled = true;
|
||||
bool fetch_stall;
|
||||
|
||||
pipeline_trace_t(uint64_t uuid, const Arch& arch)
|
||||
: uuid(uuid)
|
||||
, arch(arch)
|
||||
, cid(0)
|
||||
, wid(0)
|
||||
, tmask(0)
|
||||
, PC(0)
|
||||
, rdest(0)
|
||||
, rdest_type(RegType::None)
|
||||
, wb(false)
|
||||
, used_iregs(0)
|
||||
, used_fregs(0)
|
||||
, used_vregs(0)
|
||||
, exe_type(ExeType::ALU)
|
||||
, unit_type(0)
|
||||
, data(nullptr)
|
||||
, pid(-1)
|
||||
, sop(true)
|
||||
, eop(true)
|
||||
, fetch_stall(false)
|
||||
, log_once_(false)
|
||||
{}
|
||||
|
||||
pipeline_trace_t(const pipeline_trace_t& rhs)
|
||||
: uuid(rhs.uuid)
|
||||
, arch(rhs.arch)
|
||||
, cid(rhs.cid)
|
||||
, wid(rhs.wid)
|
||||
, tmask(rhs.tmask)
|
||||
, PC(rhs.PC)
|
||||
, rdest(rhs.rdest)
|
||||
, rdest_type(rhs.rdest_type)
|
||||
, wb(rhs.wb)
|
||||
, used_iregs(rhs.used_iregs)
|
||||
, used_fregs(rhs.used_fregs)
|
||||
, used_vregs(rhs.used_vregs)
|
||||
, exe_type(rhs.exe_type)
|
||||
, unit_type(rhs.unit_type)
|
||||
, data(rhs.data)
|
||||
, pid(rhs.pid)
|
||||
, sop(rhs.sop)
|
||||
, eop(rhs.eop)
|
||||
, fetch_stall(rhs.fetch_stall)
|
||||
, log_once_(false)
|
||||
{}
|
||||
|
||||
~pipeline_trace_t() {}
|
||||
|
||||
bool log_once(bool enable) {
|
||||
bool old = log_once_;
|
||||
log_once_ = enable;
|
||||
return old;
|
||||
}
|
||||
|
||||
void resume() {
|
||||
stalled = false;
|
||||
}
|
||||
private:
|
||||
bool log_once_;
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const pipeline_trace_t& state) {
|
||||
os << "coreid=" << state.cid << ", wid=" << state.wid << ", PC=" << std::hex << state.PC;
|
||||
os << "cid=" << state.cid;
|
||||
os << ", wid=" << state.wid;
|
||||
os << ", tmask=";
|
||||
for (uint32_t i = 0, n = state.arch.num_threads(); i < n; ++i) {
|
||||
os << state.tmask.test(i);
|
||||
}
|
||||
os << ", PC=0x" << std::hex << state.PC;
|
||||
os << ", wb=" << state.wb;
|
||||
if (state.wb) {
|
||||
os << ", rd=" << state.rdest_type << std::dec << state.rdest;
|
||||
}
|
||||
os << ", ex=" << state.exe_type;
|
||||
if (state.pid != -1) {
|
||||
os << ", pid=" << state.pid;
|
||||
os << ", sop=" << state.sop;
|
||||
os << ", eop=" << state.eop;
|
||||
}
|
||||
os << " (#" << std::dec << state.uuid << ")";
|
||||
return os;
|
||||
}
|
||||
|
||||
class PipelineLatch {
|
||||
protected:
|
||||
const char* name_;
|
||||
std::queue<pipeline_trace_t*> queue_;
|
||||
|
||||
public:
|
||||
PipelineLatch(const char* name = nullptr)
|
||||
: name_(name)
|
||||
@@ -132,6 +197,10 @@ public:
|
||||
std::queue<pipeline_trace_t*> empty;
|
||||
std::swap(queue_, empty );
|
||||
}
|
||||
|
||||
protected:
|
||||
const char* name_;
|
||||
std::queue<pipeline_trace_t*> queue_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,168 +1,141 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "processor.h"
|
||||
#include "core.h"
|
||||
#include "constants.h"
|
||||
#include "processor_impl.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
class Processor::Impl {
|
||||
private:
|
||||
std::vector<Core::Ptr> cores_;
|
||||
std::vector<Cache::Ptr> l2caches_;
|
||||
std::vector<Switch<MemReq, MemRsp>::Ptr> l2_mem_switches_;
|
||||
Cache::Ptr l3cache_;
|
||||
Switch<MemReq, MemRsp>::Ptr l3_mem_switch_;
|
||||
ProcessorImpl::ProcessorImpl(const Arch& arch)
|
||||
: arch_(arch)
|
||||
, clusters_(arch.num_clusters())
|
||||
{
|
||||
SimPlatform::instance().initialize();
|
||||
|
||||
public:
|
||||
Impl(const ArchDef& arch)
|
||||
: cores_(arch.num_cores())
|
||||
, l2caches_(NUM_CLUSTERS)
|
||||
, l2_mem_switches_(NUM_CLUSTERS)
|
||||
{
|
||||
SimPlatform::instance().initialize();
|
||||
// create memory simulator
|
||||
memsim_ = MemSim::Create("dram", MemSim::Config{
|
||||
MEMORY_BANKS,
|
||||
uint32_t(arch.num_cores()) * arch.num_clusters()
|
||||
});
|
||||
|
||||
uint32_t num_cores = arch.num_cores();
|
||||
uint32_t cores_per_cluster = num_cores / NUM_CLUSTERS;
|
||||
|
||||
// create cores
|
||||
for (uint32_t i = 0; i < num_cores; ++i) {
|
||||
cores_.at(i) = Core::Create(arch, i);
|
||||
// create L3 cache
|
||||
l3cache_ = CacheSim::Create("l3cache", CacheSim::Config{
|
||||
!L3_ENABLED,
|
||||
log2ceil(L3_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
log2ceil(L3_NUM_WAYS), // W
|
||||
0, // A
|
||||
XLEN, // address bits
|
||||
L3_NUM_BANKS, // number of banks
|
||||
1, // number of ports
|
||||
uint8_t(arch.num_clusters()), // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L3_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
}
|
||||
);
|
||||
|
||||
// connect L3 memory ports
|
||||
l3cache_->MemReqPort.bind(&memsim_->MemReqPort);
|
||||
memsim_->MemRspPort.bind(&l3cache_->MemRspPort);
|
||||
|
||||
// setup memory simulator
|
||||
auto memsim = MemSim::Create("dram", MemSim::Config{
|
||||
MEMORY_BANKS,
|
||||
arch.num_cores()
|
||||
});
|
||||
|
||||
std::vector<SimPort<MemReq>*> mem_req_ports(1, &memsim->MemReqPort);
|
||||
std::vector<SimPort<MemRsp>*> mem_rsp_ports(1, &memsim->MemRspPort);
|
||||
|
||||
if (L3_ENABLE) {
|
||||
l3cache_ = Cache::Create("l3cache", Cache::Config{
|
||||
log2ceil(L3_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
L3_NUM_BANKS, // number of banks
|
||||
L3_NUM_PORTS, // number of ports
|
||||
NUM_CLUSTERS, // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L3_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
}
|
||||
);
|
||||
l3cache_->MemReqPort.bind(mem_req_ports.at(0));
|
||||
mem_rsp_ports.at(0)->bind(&l3cache_->MemRspPort);
|
||||
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3cache_->CoreReqPorts.at(i);
|
||||
mem_rsp_ports.at(i) = &l3cache_->CoreRspPorts.at(i);
|
||||
}
|
||||
} else if (NUM_CLUSTERS > 1) {
|
||||
l3_mem_switch_ = Switch<MemReq, MemRsp>::Create("l3_arb", ArbiterType::RoundRobin, NUM_CLUSTERS);
|
||||
l3_mem_switch_->ReqOut.bind(mem_req_ports.at(0));
|
||||
mem_rsp_ports.at(0)->bind(&l3_mem_switch_->RspIn);
|
||||
|
||||
mem_req_ports.resize(NUM_CLUSTERS);
|
||||
mem_rsp_ports.resize(NUM_CLUSTERS);
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
mem_req_ports.at(i) = &l3_mem_switch_->ReqIn.at(i);
|
||||
mem_rsp_ports.at(i) = &l3_mem_switch_->RspOut.at(i);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < NUM_CLUSTERS; ++i) {
|
||||
std::vector<SimPort<MemReq>*> cluster_mem_req_ports(cores_per_cluster);
|
||||
std::vector<SimPort<MemRsp>*> cluster_mem_rsp_ports(cores_per_cluster);
|
||||
|
||||
if (L2_ENABLE) {
|
||||
auto& l2cache = l2caches_.at(i);
|
||||
l2cache = Cache::Create("l2cache", Cache::Config{
|
||||
log2ceil(L2_CACHE_SIZE), // C
|
||||
log2ceil(MEM_BLOCK_SIZE), // B
|
||||
2, // W
|
||||
0, // A
|
||||
32, // address bits
|
||||
L2_NUM_BANKS, // number of banks
|
||||
L2_NUM_PORTS, // number of ports
|
||||
(uint8_t)cores_per_cluster, // request size
|
||||
true, // write-through
|
||||
false, // write response
|
||||
0, // victim size
|
||||
L2_MSHR_SIZE, // mshr
|
||||
2, // pipeline latency
|
||||
});
|
||||
l2cache->MemReqPort.bind(mem_req_ports.at(i));
|
||||
mem_rsp_ports.at(i)->bind(&l2cache->MemRspPort);
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
cluster_mem_req_ports.at(j) = &l2cache->CoreReqPorts.at(j);
|
||||
cluster_mem_rsp_ports.at(j) = &l2cache->CoreRspPorts.at(j);
|
||||
}
|
||||
} else {
|
||||
auto& l2_mem_switch = l2_mem_switches_.at(i);
|
||||
l2_mem_switch = Switch<MemReq, MemRsp>::Create("l2_arb", ArbiterType::RoundRobin, cores_per_cluster);
|
||||
l2_mem_switch->ReqOut.bind(mem_req_ports.at(i));
|
||||
mem_rsp_ports.at(i)->bind(&l2_mem_switch->RspIn);
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
cluster_mem_req_ports.at(j) = &l2_mem_switch->ReqIn.at(j);
|
||||
cluster_mem_rsp_ports.at(j) = &l2_mem_switch->RspOut.at(j);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < cores_per_cluster; ++j) {
|
||||
auto& core = cores_.at((i * cores_per_cluster) + j);
|
||||
core->MemReqPort.bind(cluster_mem_req_ports.at(j));
|
||||
cluster_mem_rsp_ports.at(j)->bind(&core->MemRspPort);
|
||||
}
|
||||
}
|
||||
// create clusters
|
||||
for (uint32_t i = 0; i < arch.num_clusters(); ++i) {
|
||||
clusters_.at(i) = Cluster::Create(i, this, arch, dcrs_);
|
||||
// connect L3 core ports
|
||||
clusters_.at(i)->mem_req_port.bind(&l3cache_->CoreReqPorts.at(i));
|
||||
l3cache_->CoreRspPorts.at(i).bind(&clusters_.at(i)->mem_rsp_port);
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
SimPlatform::instance().finalize();
|
||||
}
|
||||
// set up memory perf recording
|
||||
memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
perf_mem_reads_ += !req.write;
|
||||
perf_mem_writes_ += req.write;
|
||||
perf_mem_pending_reads_ += !req.write;
|
||||
});
|
||||
memsim_->MemRspPort.tx_callback([&](const MemRsp&, uint64_t cycle){
|
||||
__unused (cycle);
|
||||
--perf_mem_pending_reads_;
|
||||
});
|
||||
|
||||
void attach_ram(RAM* ram) {
|
||||
for (auto core : cores_) {
|
||||
core->attach_ram(ram);
|
||||
}
|
||||
}
|
||||
this->reset();
|
||||
}
|
||||
|
||||
int run() {
|
||||
SimPlatform::instance().reset();
|
||||
bool running;
|
||||
int exitcode = 0;
|
||||
do {
|
||||
SimPlatform::instance().tick();
|
||||
running = false;
|
||||
for (auto& core : cores_) {
|
||||
if (core->running()) {
|
||||
running = true;
|
||||
}
|
||||
if (core->check_exit()) {
|
||||
exitcode = core->getIRegValue(3);
|
||||
running = false;
|
||||
break;
|
||||
ProcessorImpl::~ProcessorImpl() {
|
||||
SimPlatform::instance().finalize();
|
||||
}
|
||||
|
||||
void ProcessorImpl::attach_ram(RAM* ram) {
|
||||
for (auto cluster : clusters_) {
|
||||
cluster->attach_ram(ram);
|
||||
}
|
||||
}
|
||||
|
||||
int ProcessorImpl::run(bool riscv_test) {
|
||||
SimPlatform::instance().reset();
|
||||
this->reset();
|
||||
|
||||
bool done;
|
||||
Word exitcode = 0;
|
||||
do {
|
||||
SimPlatform::instance().tick();
|
||||
done = true;
|
||||
for (auto cluster : clusters_) {
|
||||
if (cluster->running()) {
|
||||
Word ec;
|
||||
if (cluster->check_exit(&ec, riscv_test)) {
|
||||
exitcode |= ec;
|
||||
} else {
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
} while (running);
|
||||
}
|
||||
perf_mem_latency_ += perf_mem_pending_reads_;
|
||||
} while (!done);
|
||||
|
||||
return exitcode;
|
||||
}
|
||||
};
|
||||
return exitcode;
|
||||
}
|
||||
|
||||
void ProcessorImpl::reset() {
|
||||
perf_mem_reads_ = 0;
|
||||
perf_mem_writes_ = 0;
|
||||
perf_mem_latency_ = 0;
|
||||
perf_mem_pending_reads_ = 0;
|
||||
}
|
||||
|
||||
void ProcessorImpl::write_dcr(uint32_t addr, uint32_t value) {
|
||||
dcrs_.write(addr, value);
|
||||
}
|
||||
|
||||
ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
|
||||
ProcessorImpl::PerfStats perf;
|
||||
perf.mem_reads = perf_mem_reads_;
|
||||
perf.mem_writes = perf_mem_writes_;
|
||||
perf.mem_latency = perf_mem_latency_;
|
||||
perf.l3cache = l3cache_->perf_stats();
|
||||
for (auto cluster : clusters_) {
|
||||
perf.clusters += cluster->perf_stats();
|
||||
}
|
||||
return perf;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Processor::Processor(const ArchDef& arch)
|
||||
: impl_(new Impl(arch))
|
||||
Processor::Processor(const Arch& arch)
|
||||
: impl_(new ProcessorImpl(arch))
|
||||
{}
|
||||
|
||||
Processor::~Processor() {
|
||||
@@ -173,6 +146,10 @@ void Processor::attach_ram(RAM* mem) {
|
||||
impl_->attach_ram(mem);
|
||||
}
|
||||
|
||||
int Processor::run() {
|
||||
return impl_->run();
|
||||
int Processor::run(bool riscv_test) {
|
||||
return impl_->run(riscv_test);
|
||||
}
|
||||
|
||||
void Processor::write_dcr(uint32_t addr, uint32_t value) {
|
||||
return impl_->write_dcr(addr, value);
|
||||
}
|
||||
@@ -1,22 +1,39 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ArchDef;
|
||||
class Arch;
|
||||
class RAM;
|
||||
class ProcessorImpl;
|
||||
|
||||
class Processor {
|
||||
public:
|
||||
Processor(const ArchDef& arch);
|
||||
Processor(const Arch& arch);
|
||||
~Processor();
|
||||
|
||||
void attach_ram(RAM* mem);
|
||||
|
||||
int run();
|
||||
int run(bool riscv_test);
|
||||
|
||||
void write_dcr(uint32_t addr, uint32_t value);
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
ProcessorImpl* impl_;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
66
sim/simx/processor_impl.h
Normal file
66
sim/simx/processor_impl.h
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mem_sim.h"
|
||||
#include "cache_sim.h"
|
||||
#include "constants.h"
|
||||
#include "dcrs.h"
|
||||
#include "cluster.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class ProcessorImpl {
|
||||
public:
|
||||
struct PerfStats {
|
||||
uint64_t mem_reads;
|
||||
uint64_t mem_writes;
|
||||
uint64_t mem_latency;
|
||||
CacheSim::PerfStats l3cache;
|
||||
Cluster::PerfStats clusters;
|
||||
|
||||
PerfStats()
|
||||
: mem_reads(0)
|
||||
, mem_writes(0)
|
||||
, mem_latency(0)
|
||||
{}
|
||||
};
|
||||
|
||||
ProcessorImpl(const Arch& arch);
|
||||
~ProcessorImpl();
|
||||
|
||||
void attach_ram(RAM* mem);
|
||||
|
||||
int run(bool riscv_test);
|
||||
|
||||
void write_dcr(uint32_t addr, uint32_t value);
|
||||
|
||||
ProcessorImpl::PerfStats perf_stats() const;
|
||||
|
||||
private:
|
||||
|
||||
void reset();
|
||||
|
||||
const Arch& arch_;
|
||||
std::vector<std::shared_ptr<Cluster>> clusters_;
|
||||
DCRS dcrs_;
|
||||
MemSim::Ptr memsim_;
|
||||
CacheSim::Ptr l3cache_;
|
||||
uint64_t perf_mem_reads_;
|
||||
uint64_t perf_mem_writes_;
|
||||
uint64_t perf_mem_latency_;
|
||||
uint64_t perf_mem_pending_reads_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
@@ -6,20 +19,15 @@
|
||||
namespace vortex {
|
||||
|
||||
class Scoreboard {
|
||||
private:
|
||||
public:
|
||||
|
||||
struct reg_use_t {
|
||||
RegType type;
|
||||
uint32_t reg;
|
||||
uint64_t owner;
|
||||
};
|
||||
|
||||
std::vector<RegMask> in_use_iregs_;
|
||||
std::vector<RegMask> in_use_fregs_;
|
||||
std::vector<RegMask> in_use_vregs_;
|
||||
std::unordered_map<uint32_t, uint64_t> owners_;
|
||||
|
||||
public:
|
||||
Scoreboard(const ArchDef &arch)
|
||||
|
||||
Scoreboard(const Arch &arch)
|
||||
: in_use_iregs_(arch.num_warps())
|
||||
, in_use_fregs_(arch.num_warps())
|
||||
, in_use_vregs_(arch.num_warps())
|
||||
@@ -84,8 +92,7 @@ public:
|
||||
}
|
||||
|
||||
void reserve(pipeline_trace_t* state) {
|
||||
if (!state->wb)
|
||||
return;
|
||||
assert(state->wb);
|
||||
switch (state->rdest_type) {
|
||||
case RegType::Integer:
|
||||
in_use_iregs_.at(state->wid).set(state->rdest);
|
||||
@@ -105,8 +112,7 @@ public:
|
||||
}
|
||||
|
||||
void release(pipeline_trace_t* state) {
|
||||
if (!state->wb)
|
||||
return;
|
||||
assert(state->wb);
|
||||
switch (state->rdest_type) {
|
||||
case RegType::Integer:
|
||||
in_use_iregs_.at(state->wid).reset(state->rdest);
|
||||
@@ -123,6 +129,13 @@ public:
|
||||
uint32_t tag = (state->rdest << 16) | (state->wid << 4) | (int)state->rdest_type;
|
||||
owners_.erase(tag);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::vector<RegMask> in_use_iregs_;
|
||||
std::vector<RegMask> in_use_fregs_;
|
||||
std::vector<RegMask> in_use_vregs_;
|
||||
std::unordered_map<uint32_t, uint64_t> owners_;
|
||||
};
|
||||
|
||||
}
|
||||
138
sim/simx/shared_mem.cpp
Normal file
138
sim/simx/shared_mem.cpp
Normal file
@@ -0,0 +1,138 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "shared_mem.h"
|
||||
#include "core.h"
|
||||
#include <bitmanip.h>
|
||||
#include <vector>
|
||||
#include "types.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
class SharedMem::Impl {
|
||||
protected:
|
||||
SharedMem* simobject_;
|
||||
Config config_;
|
||||
RAM ram_;
|
||||
uint32_t bank_sel_addr_start_;
|
||||
uint32_t bank_sel_addr_end_;
|
||||
PerfStats perf_stats_;
|
||||
|
||||
uint64_t to_local_addr(uint64_t addr) {
|
||||
uint32_t total_lines = config_.capacity / config_.line_size;
|
||||
uint32_t line_bits = log2ceil(total_lines);
|
||||
uint32_t offset = bit_getw(addr, 0, line_bits-1);
|
||||
return offset;
|
||||
}
|
||||
|
||||
public:
|
||||
Impl(SharedMem* simobject, const Config& config)
|
||||
: simobject_(simobject)
|
||||
, config_(config)
|
||||
, ram_(config.capacity, config.capacity)
|
||||
, bank_sel_addr_start_(0)
|
||||
, bank_sel_addr_end_(0 + log2ceil(config.num_banks)-1)
|
||||
{}
|
||||
|
||||
virtual ~Impl() {}
|
||||
|
||||
void reset() {
|
||||
perf_stats_ = PerfStats();
|
||||
}
|
||||
|
||||
void read(void* data, uint64_t addr, uint32_t size) {
|
||||
auto s_addr = to_local_addr(addr);
|
||||
DPH(3, "Shared Mem addr=0x" << std::hex << s_addr << std::endl);
|
||||
ram_.read(data, s_addr, size);
|
||||
}
|
||||
|
||||
void write(const void* data, uint64_t addr, uint32_t size) {
|
||||
auto s_addr = to_local_addr(addr);
|
||||
DPH(3, "Shared Mem addr=0x" << std::hex << s_addr << std::endl);
|
||||
ram_.write(data, s_addr, size);
|
||||
}
|
||||
|
||||
void tick() {
|
||||
std::vector<bool> in_used_banks(config_.num_banks);
|
||||
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
|
||||
auto& core_req_port = simobject_->Inputs.at(req_id);
|
||||
if (core_req_port.empty())
|
||||
continue;
|
||||
|
||||
auto& core_req = core_req_port.front();
|
||||
|
||||
uint32_t bank_id = 0;
|
||||
if (bank_sel_addr_start_ <= bank_sel_addr_end_) {
|
||||
bank_id = (uint32_t)bit_getw(core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
|
||||
}
|
||||
|
||||
// bank conflict check
|
||||
if (in_used_banks.at(bank_id)) {
|
||||
++perf_stats_.bank_stalls;
|
||||
continue;
|
||||
}
|
||||
|
||||
in_used_banks.at(bank_id) = true;
|
||||
|
||||
if (!core_req.write || config_.write_reponse) {
|
||||
// send response
|
||||
MemRsp core_rsp{core_req.tag, core_req.cid};
|
||||
simobject_->Outputs.at(req_id).send(core_rsp, 1);
|
||||
}
|
||||
|
||||
// update perf counters
|
||||
perf_stats_.reads += !core_req.write;
|
||||
perf_stats_.writes += core_req.write;
|
||||
|
||||
// remove input
|
||||
core_req_port.pop();
|
||||
}
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
SharedMem::SharedMem(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<SharedMem>(ctx, name)
|
||||
, Inputs(config.num_reqs, this)
|
||||
, Outputs(config.num_reqs, this)
|
||||
, impl_(new Impl(this, config))
|
||||
{}
|
||||
|
||||
SharedMem::~SharedMem() {
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
void SharedMem::reset() {
|
||||
impl_->reset();
|
||||
}
|
||||
|
||||
void SharedMem::read(void* data, uint64_t addr, uint32_t size) {
|
||||
impl_->read(data, addr, size);
|
||||
}
|
||||
|
||||
void SharedMem::write(const void* data, uint64_t addr, uint32_t size) {
|
||||
impl_->write(data, addr, size);
|
||||
}
|
||||
|
||||
void SharedMem::tick() {
|
||||
impl_->tick();
|
||||
}
|
||||
|
||||
const SharedMem::PerfStats& SharedMem::perf_stats() const {
|
||||
return impl_->perf_stats();
|
||||
}
|
||||
72
sim/simx/shared_mem.h
Normal file
72
sim/simx/shared_mem.h
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class SharedMem : public SimObject<SharedMem> {
|
||||
public:
|
||||
struct Config {
|
||||
uint32_t capacity;
|
||||
uint32_t line_size;
|
||||
uint32_t num_reqs;
|
||||
uint32_t num_banks;
|
||||
bool write_reponse;
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
uint64_t bank_stalls;
|
||||
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
, writes(0)
|
||||
, bank_stalls(0)
|
||||
{}
|
||||
|
||||
PerfStats& operator+=(const PerfStats& rhs) {
|
||||
this->reads += rhs.reads;
|
||||
this->writes += rhs.writes;
|
||||
this->bank_stalls += rhs.bank_stalls;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<SimPort<MemReq>> Inputs;
|
||||
std::vector<SimPort<MemRsp>> Outputs;
|
||||
|
||||
SharedMem(const SimContext& ctx, const char* name, const Config& config);
|
||||
virtual ~SharedMem();
|
||||
|
||||
void reset();
|
||||
|
||||
void read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
void write(const void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
void tick();
|
||||
|
||||
const PerfStats& perf_stats() const;
|
||||
|
||||
protected:
|
||||
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <simobject.h>
|
||||
#include <bitmanip.h>
|
||||
#include <vector>
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Core;
|
||||
|
||||
class SharedMem : public SimObject<SharedMem> {
|
||||
public:
|
||||
struct Config {
|
||||
uint32_t num_reqs;
|
||||
uint32_t num_banks;
|
||||
uint32_t bank_offset;
|
||||
uint32_t latency;
|
||||
bool write_reponse;
|
||||
};
|
||||
|
||||
struct PerfStats {
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
uint64_t bank_stalls;
|
||||
|
||||
PerfStats()
|
||||
: reads(0)
|
||||
, writes(0)
|
||||
, bank_stalls(0)
|
||||
{}
|
||||
};
|
||||
|
||||
std::vector<SimPort<MemReq>> Inputs;
|
||||
std::vector<SimPort<MemRsp>> Outputs;
|
||||
|
||||
SharedMem(const SimContext& ctx, const char* name, const Config& config)
|
||||
: SimObject<SharedMem>(ctx, name)
|
||||
, Inputs(config.num_reqs, this)
|
||||
, Outputs(config.num_reqs, this)
|
||||
, config_(config)
|
||||
, bank_sel_addr_start_(config.bank_offset)
|
||||
, bank_sel_addr_end_(config.bank_offset + log2up(config.num_banks)-1)
|
||||
{}
|
||||
|
||||
virtual ~SharedMem() {}
|
||||
|
||||
void reset() {
|
||||
perf_stats_ = PerfStats();
|
||||
}
|
||||
|
||||
void tick() {
|
||||
std::vector<bool> in_used_banks(config_.num_banks);
|
||||
for (uint32_t req_id = 0; req_id < config_.num_reqs; ++req_id) {
|
||||
auto& core_req_port = this->Inputs.at(req_id);
|
||||
if (core_req_port.empty())
|
||||
continue;
|
||||
|
||||
auto& core_req = core_req_port.front();
|
||||
|
||||
uint32_t bank_id = (uint32_t)bit_getw(
|
||||
core_req.addr, bank_sel_addr_start_, bank_sel_addr_end_);
|
||||
|
||||
// bank conflict check
|
||||
if (in_used_banks.at(bank_id))
|
||||
continue;
|
||||
|
||||
in_used_banks.at(bank_id) = true;
|
||||
|
||||
if (!core_req.write || config_.write_reponse) {
|
||||
// send response
|
||||
MemRsp core_rsp{core_req.tag, core_req.core_id};
|
||||
this->Outputs.at(req_id).send(core_rsp, 1);
|
||||
}
|
||||
|
||||
// update perf counters
|
||||
perf_stats_.reads += !core_req.write;
|
||||
perf_stats_.writes += core_req.write;
|
||||
|
||||
// remove input
|
||||
core_req_port.pop();
|
||||
}
|
||||
}
|
||||
|
||||
const PerfStats& perf_stats() const {
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
protected:
|
||||
Config config_;
|
||||
uint32_t bank_sel_addr_start_;
|
||||
uint32_t bank_sel_addr_end_;
|
||||
PerfStats perf_stats_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,100 +0,0 @@
|
||||
#include "tex_unit.h"
|
||||
#include "core.h"
|
||||
#include <texturing.h>
|
||||
#include <VX_config.h>
|
||||
|
||||
using namespace vortex;
|
||||
using namespace cocogfx;
|
||||
|
||||
enum class FilterMode {
|
||||
Point,
|
||||
Bilinear,
|
||||
Trilinear,
|
||||
};
|
||||
|
||||
TexUnit::TexUnit(Core* core) : core_(core) {}
|
||||
|
||||
TexUnit::~TexUnit() {}
|
||||
|
||||
void TexUnit::clear() {
|
||||
for (auto& state : states_) {
|
||||
state = 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t TexUnit::get_state(uint32_t state) {
|
||||
return states_.at(state);
|
||||
}
|
||||
|
||||
void TexUnit::set_state(uint32_t state, uint32_t value) {
|
||||
states_.at(state) = value;
|
||||
}
|
||||
|
||||
uint32_t TexUnit::read(int32_t u,
|
||||
int32_t v,
|
||||
int32_t lod,
|
||||
std::vector<mem_addr_size_t>* mem_addrs) {
|
||||
//--
|
||||
auto xu = Fixed<TEX_FXD_FRAC>::make(u);
|
||||
auto xv = Fixed<TEX_FXD_FRAC>::make(v);
|
||||
uint32_t base_addr = states_.at(TEX_STATE_ADDR) + states_.at(TEX_STATE_MIPOFF(lod));
|
||||
uint32_t log_width = std::max<int32_t>(states_.at(TEX_STATE_WIDTH) - lod, 0);
|
||||
uint32_t log_height = std::max<int32_t>(states_.at(TEX_STATE_HEIGHT) - lod, 0);
|
||||
auto format = (TexFormat)states_.at(TEX_STATE_FORMAT);
|
||||
auto filter = (FilterMode)states_.at(TEX_STATE_FILTER);
|
||||
auto wrapu = (WrapMode)states_.at(TEX_STATE_WRAPU);
|
||||
auto wrapv = (WrapMode)states_.at(TEX_STATE_WRAPV);
|
||||
|
||||
auto stride = Stride(format);
|
||||
|
||||
switch (filter) {
|
||||
case FilterMode::Bilinear: {
|
||||
// addressing
|
||||
uint32_t offset00, offset01, offset10, offset11;
|
||||
uint32_t alpha, beta;
|
||||
TexAddressLinear(xu, xv, log_width, log_height, wrapu, wrapv,
|
||||
&offset00, &offset01, &offset10, &offset11, &alpha, &beta);
|
||||
|
||||
uint32_t addr00 = base_addr + offset00 * stride;
|
||||
uint32_t addr01 = base_addr + offset01 * stride;
|
||||
uint32_t addr10 = base_addr + offset10 * stride;
|
||||
uint32_t addr11 = base_addr + offset11 * stride;
|
||||
|
||||
// memory lookup
|
||||
uint32_t texel00(0), texel01(0), texel10(0), texel11(0);
|
||||
core_->dcache_read(&texel00, addr00, stride);
|
||||
core_->dcache_read(&texel01, addr01, stride);
|
||||
core_->dcache_read(&texel10, addr10, stride);
|
||||
core_->dcache_read(&texel11, addr11, stride);
|
||||
|
||||
mem_addrs->push_back({addr00, stride});
|
||||
mem_addrs->push_back({addr01, stride});
|
||||
mem_addrs->push_back({addr10, stride});
|
||||
mem_addrs->push_back({addr11, stride});
|
||||
|
||||
// filtering
|
||||
auto color = TexFilterLinear(
|
||||
format, texel00, texel01, texel10, texel11, alpha, beta);
|
||||
return color;
|
||||
}
|
||||
case FilterMode::Point: {
|
||||
// addressing
|
||||
uint32_t offset;
|
||||
TexAddressPoint(xu, xv, log_width, log_height, wrapu, wrapv, &offset);
|
||||
|
||||
uint32_t addr = base_addr + offset * stride;
|
||||
|
||||
// memory lookup
|
||||
uint32_t texel(0);
|
||||
core_->dcache_read(&texel, addr, stride);
|
||||
mem_addrs->push_back({addr, stride});
|
||||
|
||||
// filtering
|
||||
auto color = TexFilterPoint(format, texel);
|
||||
return color;
|
||||
}
|
||||
default:
|
||||
std::abort();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "types.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Core;
|
||||
|
||||
class TexUnit {
|
||||
public:
|
||||
TexUnit(Core* core);
|
||||
~TexUnit();
|
||||
|
||||
void clear();
|
||||
|
||||
uint32_t get_state(uint32_t state);
|
||||
|
||||
void set_state(uint32_t state, uint32_t value);
|
||||
|
||||
uint32_t read(int32_t u, int32_t v, int32_t lod, std::vector<mem_addr_size_t>* mem_addrs);
|
||||
|
||||
private:
|
||||
|
||||
std::array<uint32_t, NUM_TEX_STATES> states_;
|
||||
Core* core_;
|
||||
};
|
||||
|
||||
}
|
||||
367
sim/simx/types.h
367
sim/simx/types.h
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
@@ -5,31 +18,42 @@
|
||||
#include <queue>
|
||||
#include <unordered_map>
|
||||
#include <util.h>
|
||||
#include <stringutil.h>
|
||||
#include <VX_config.h>
|
||||
#include <simobject.h>
|
||||
#include "uuid_gen.h"
|
||||
#include "debug.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
typedef uint8_t Byte;
|
||||
#if XLEN == 32
|
||||
#if (XLEN == 32)
|
||||
typedef uint32_t Word;
|
||||
typedef int32_t WordI;
|
||||
typedef uint64_t DWord;
|
||||
typedef int64_t DWordI;
|
||||
#elif XLEN == 64
|
||||
typedef uint32_t WordF;
|
||||
#elif (XLEN == 64)
|
||||
typedef uint64_t Word;
|
||||
typedef int64_t WordI;
|
||||
typedef __uint128_t DWord;
|
||||
typedef __int128_t DWordI;
|
||||
typedef uint64_t WordF;
|
||||
#else
|
||||
#error unsupported XLEN
|
||||
#endif
|
||||
|
||||
typedef uint64_t FWord;
|
||||
#define MAX_NUM_CORES 1024
|
||||
#define MAX_NUM_THREADS 32
|
||||
#define MAX_NUM_WARPS 32
|
||||
#define MAX_NUM_REGS 32
|
||||
|
||||
typedef std::bitset<32> RegMask;
|
||||
typedef std::bitset<32> ThreadMask;
|
||||
typedef std::bitset<32> WarpMask;
|
||||
typedef std::bitset<MAX_NUM_CORES> CoreMask;
|
||||
typedef std::bitset<MAX_NUM_REGS> RegMask;
|
||||
typedef std::bitset<MAX_NUM_THREADS> ThreadMask;
|
||||
typedef std::bitset<MAX_NUM_WARPS> WarpMask;
|
||||
|
||||
typedef std::unordered_map<uint32_t, uint32_t> CSRs;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -40,8 +64,8 @@ enum class RegType {
|
||||
Vector
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const RegType& clss) {
|
||||
switch (clss) {
|
||||
inline std::ostream &operator<<(std::ostream &os, const RegType& type) {
|
||||
switch (type) {
|
||||
case RegType::None: break;
|
||||
case RegType::Integer: os << "x"; break;
|
||||
case RegType::Float: os << "f"; break;
|
||||
@@ -53,23 +77,19 @@ inline std::ostream &operator<<(std::ostream &os, const RegType& clss) {
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
enum class ExeType {
|
||||
NOP,
|
||||
ALU,
|
||||
LSU,
|
||||
CSR,
|
||||
FPU,
|
||||
GPU,
|
||||
SFU,
|
||||
MAX,
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const ExeType& type) {
|
||||
switch (type) {
|
||||
case ExeType::NOP: os << "NOP"; break;
|
||||
case ExeType::ALU: os << "ALU"; break;
|
||||
case ExeType::LSU: os << "LSU"; break;
|
||||
case ExeType::CSR: os << "CSR"; break;
|
||||
case ExeType::FPU: os << "FPU"; break;
|
||||
case ExeType::GPU: os << "GPU"; break;
|
||||
case ExeType::SFU: os << "SFU"; break;
|
||||
case ExeType::MAX: break;
|
||||
}
|
||||
return os;
|
||||
@@ -82,8 +102,7 @@ enum class AluType {
|
||||
BRANCH,
|
||||
SYSCALL,
|
||||
IMUL,
|
||||
IDIV,
|
||||
CMOV,
|
||||
IDIV
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const AluType& type) {
|
||||
@@ -93,7 +112,6 @@ inline std::ostream &operator<<(std::ostream &os, const AluType& type) {
|
||||
case AluType::SYSCALL: os << "SYSCALL"; break;
|
||||
case AluType::IMUL: os << "IMUL"; break;
|
||||
case AluType::IDIV: os << "IDIV"; break;
|
||||
case AluType::CMOV: os << "CMOV"; break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
@@ -103,16 +121,14 @@ inline std::ostream &operator<<(std::ostream &os, const AluType& type) {
|
||||
enum class LsuType {
|
||||
LOAD,
|
||||
STORE,
|
||||
FENCE,
|
||||
PREFETCH,
|
||||
FENCE
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const LsuType& type) {
|
||||
switch (type) {
|
||||
case LsuType::LOAD: os << "LOAD"; break;
|
||||
case LsuType::STORE: os << "STORE"; break;
|
||||
case LsuType::FENCE: os << "FENCE"; break;
|
||||
case LsuType::PREFETCH: os << "PREFETCH"; break;
|
||||
case LsuType::LOAD: os << "LOAD"; break;
|
||||
case LsuType::STORE: os << "STORE"; break;
|
||||
case LsuType::FENCE: os << "FENCE"; break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
@@ -141,21 +157,6 @@ struct mem_addr_size_t {
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
inline AddrType get_addr_type(Word addr, uint32_t size) {
|
||||
__unused (size);
|
||||
if (SM_ENABLE) {
|
||||
if (addr >= (SMEM_BASE_ADDR - SMEM_SIZE)
|
||||
&& addr < SMEM_BASE_ADDR) {
|
||||
assert((addr + size) <= SMEM_BASE_ADDR);
|
||||
return AddrType::Shared;
|
||||
}
|
||||
}
|
||||
if (addr >= IO_BASE_ADDR) {
|
||||
return AddrType::IO;
|
||||
}
|
||||
return AddrType::Global;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
enum class FpuType {
|
||||
@@ -179,23 +180,37 @@ inline std::ostream &operator<<(std::ostream &os, const FpuType& type) {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
enum class GpuType {
|
||||
enum class SfuType {
|
||||
TMC,
|
||||
WSPAWN,
|
||||
SPLIT,
|
||||
JOIN,
|
||||
BAR,
|
||||
PRED,
|
||||
CSRRW,
|
||||
CSRRS,
|
||||
CSRRC,
|
||||
TEX,
|
||||
RASTER,
|
||||
ROP,
|
||||
CMOV
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const GpuType& type) {
|
||||
inline std::ostream &operator<<(std::ostream &os, const SfuType& type) {
|
||||
switch (type) {
|
||||
case GpuType::TMC: os << "TMC"; break;
|
||||
case GpuType::WSPAWN: os << "WSPAWN"; break;
|
||||
case GpuType::SPLIT: os << "SPLIT"; break;
|
||||
case GpuType::JOIN: os << "JOIN"; break;
|
||||
case GpuType::BAR: os << "BAR"; break;
|
||||
case GpuType::TEX: os << "TEX"; break;
|
||||
case SfuType::TMC: os << "TMC"; break;
|
||||
case SfuType::WSPAWN: os << "WSPAWN"; break;
|
||||
case SfuType::SPLIT: os << "SPLIT"; break;
|
||||
case SfuType::JOIN: os << "JOIN"; break;
|
||||
case SfuType::BAR: os << "BAR"; break;
|
||||
case SfuType::PRED: os << "PRED"; break;
|
||||
case SfuType::CSRRW: os << "CSRRW"; break;
|
||||
case SfuType::CSRRS: os << "CSRRS"; break;
|
||||
case SfuType::CSRRC: os << "CSRRC"; break;
|
||||
case SfuType::TEX: os << "TEX"; break;
|
||||
case SfuType::RASTER: os << "RASTER"; break;
|
||||
case SfuType::ROP: os << "ROP"; break;
|
||||
case SfuType::CMOV: os << "CMOV"; break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
@@ -218,31 +233,32 @@ inline std::ostream &operator<<(std::ostream &os, const ArbiterType& type) {
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct MemReq {
|
||||
uint64_t addr;
|
||||
bool write;
|
||||
bool non_cacheable;
|
||||
uint32_t tag;
|
||||
uint32_t core_id;
|
||||
uint64_t uuid;
|
||||
uint64_t addr;
|
||||
bool write;
|
||||
AddrType type;
|
||||
uint32_t tag;
|
||||
uint32_t cid;
|
||||
uint64_t uuid;
|
||||
|
||||
MemReq(uint64_t _addr = 0,
|
||||
bool _write = false,
|
||||
bool _non_cacheable = false,
|
||||
uint64_t _tag = 0,
|
||||
uint32_t _core_id = 0,
|
||||
uint64_t _uuid = 0
|
||||
) : addr(_addr)
|
||||
, write(_write)
|
||||
, non_cacheable(_non_cacheable)
|
||||
, tag(_tag)
|
||||
, core_id(_core_id)
|
||||
, uuid(_uuid)
|
||||
{}
|
||||
MemReq(uint64_t _addr = 0,
|
||||
bool _write = false,
|
||||
AddrType _type = AddrType::Global,
|
||||
uint64_t _tag = 0,
|
||||
uint32_t _cid = 0,
|
||||
uint64_t _uuid = 0
|
||||
) : addr(_addr)
|
||||
, write(_write)
|
||||
, type(_type)
|
||||
, tag(_tag)
|
||||
, cid(_cid)
|
||||
, uuid(_uuid)
|
||||
{}
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const MemReq& req) {
|
||||
os << "mem-" << (req.write ? "wr" : "rd") << ": ";
|
||||
os << "addr=" << std::hex << req.addr << std::dec << ", tag=" << req.tag << ", core_id=" << req.core_id;
|
||||
os << "addr=0x" << std::hex << req.addr << ", type=" << req.type;
|
||||
os << std::dec << ", tag=" << req.tag << ", cid=" << req.cid;
|
||||
os << " (#" << std::dec << req.uuid << ")";
|
||||
return os;
|
||||
}
|
||||
@@ -250,18 +266,19 @@ inline std::ostream &operator<<(std::ostream &os, const MemReq& req) {
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct MemRsp {
|
||||
uint64_t tag;
|
||||
uint32_t core_id;
|
||||
uint64_t uuid;
|
||||
MemRsp(uint64_t _tag = 0, uint32_t _core_id = 0, uint64_t _uuid = 0)
|
||||
: tag (_tag)
|
||||
, core_id(_core_id)
|
||||
, uuid(_uuid)
|
||||
{}
|
||||
uint64_t tag;
|
||||
uint32_t cid;
|
||||
uint64_t uuid;
|
||||
|
||||
MemRsp(uint64_t _tag = 0, uint32_t _cid = 0, uint64_t _uuid = 0)
|
||||
: tag (_tag)
|
||||
, cid(_cid)
|
||||
, uuid(_uuid)
|
||||
{}
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const MemRsp& rsp) {
|
||||
os << "mem-rsp: tag=" << rsp.tag << ", core_id=" << rsp.core_id;
|
||||
os << "mem-rsp: tag=" << rsp.tag << ", cid=" << rsp.cid;
|
||||
os << " (#" << std::dec << rsp.uuid << ")";
|
||||
return os;
|
||||
}
|
||||
@@ -270,10 +287,6 @@ inline std::ostream &operator<<(std::ostream &os, const MemRsp& rsp) {
|
||||
|
||||
template <typename T>
|
||||
class HashTable {
|
||||
private:
|
||||
std::vector<std::pair<bool, T>> entries_;
|
||||
uint32_t size_;
|
||||
|
||||
public:
|
||||
HashTable(uint32_t capacity)
|
||||
: entries_(capacity)
|
||||
@@ -336,92 +349,180 @@ public:
|
||||
}
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::pair<bool, T>> entries_;
|
||||
uint32_t size_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Req, typename Rsp, uint32_t MaxInputs = 32>
|
||||
template <typename Req, typename Rsp>
|
||||
class Switch : public SimObject<Switch<Req, Rsp>> {
|
||||
private:
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
uint32_t cursor_;
|
||||
uint32_t tag_shift_;
|
||||
|
||||
public:
|
||||
std::vector<SimPort<Req>> ReqIn;
|
||||
std::vector<SimPort<Rsp>> RspIn;
|
||||
|
||||
std::vector<SimPort<Req>> ReqOut;
|
||||
std::vector<SimPort<Rsp>> RspOut;
|
||||
|
||||
Switch(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
ArbiterType type,
|
||||
uint32_t num_inputs,
|
||||
uint32_t num_inputs = 1,
|
||||
uint32_t num_outputs = 1,
|
||||
uint32_t delay = 1
|
||||
)
|
||||
: SimObject<Switch<Req, Rsp, MaxInputs>>(ctx, name)
|
||||
: SimObject<Switch<Req, Rsp>>(ctx, name)
|
||||
, ReqIn(num_inputs, this)
|
||||
, RspIn(num_inputs, this)
|
||||
, ReqOut(num_outputs, this)
|
||||
, RspOut(num_outputs, this)
|
||||
, type_(type)
|
||||
, delay_(delay)
|
||||
, cursor_(0)
|
||||
, tag_shift_(log2ceil(num_inputs))
|
||||
, ReqIn(num_inputs, this)
|
||||
, ReqOut(this)
|
||||
, RspIn(this)
|
||||
, RspOut(num_inputs, this)
|
||||
, cursors_(num_outputs, 0)
|
||||
, lg_num_reqs_(log2ceil(num_inputs / num_outputs))
|
||||
{
|
||||
assert(delay_ != 0);
|
||||
assert(num_inputs <= MaxInputs);
|
||||
if (num_inputs == 1) {
|
||||
// bypass
|
||||
ReqIn.at(0).bind(&ReqOut);
|
||||
RspIn.bind(&RspOut.at(0));
|
||||
assert(delay != 0);
|
||||
assert(num_inputs <= 32);
|
||||
assert(num_outputs <= 32);
|
||||
assert(num_inputs >= num_outputs);
|
||||
|
||||
if (num_inputs == num_outputs) {
|
||||
// bypass mode
|
||||
for (uint32_t i = 0; i < num_inputs; ++i) {
|
||||
ReqIn.at(i).bind(&ReqOut.at(i));
|
||||
RspOut.at(i).bind(&RspIn.at(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void reset() {
|
||||
cursor_ = 0;
|
||||
for (auto& cursor : cursors_) {
|
||||
cursor = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void tick() {
|
||||
if (ReqIn.size() == 1)
|
||||
void tick() {
|
||||
uint32_t I = ReqIn.size();
|
||||
uint32_t O = ReqOut.size();
|
||||
uint32_t R = 1 << lg_num_reqs_;
|
||||
|
||||
// skip bypass mode
|
||||
if (I == O)
|
||||
return;
|
||||
|
||||
// process incomming requests
|
||||
for (uint32_t i = 0, n = ReqIn.size(); i < n; ++i) {
|
||||
uint32_t j = (cursor_ + i) % n;
|
||||
auto& req_in = ReqIn.at(j);
|
||||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
if (tag_shift_) {
|
||||
req.tag = (req.tag << tag_shift_) | j;
|
||||
// process incomming requests
|
||||
for (uint32_t o = 0; o < O; ++o) {
|
||||
for (uint32_t r = 0; r < R; ++r) {
|
||||
uint32_t i = (cursors_.at(o) + r) & (R-1);
|
||||
uint32_t j = o * R + i;
|
||||
if (j >= I)
|
||||
continue;
|
||||
|
||||
auto& req_in = ReqIn.at(j);
|
||||
if (!req_in.empty()) {
|
||||
auto& req = req_in.front();
|
||||
if (lg_num_reqs_ != 0) {
|
||||
req.tag = (req.tag << lg_num_reqs_) | i;
|
||||
}
|
||||
DT(4, this->name() << "-" << req);
|
||||
ReqOut.at(o).send(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_cursor(o, i);
|
||||
break;
|
||||
}
|
||||
ReqOut.send(req, delay_);
|
||||
req_in.pop();
|
||||
this->update_cursor(j);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// process incoming reponses
|
||||
if (!RspIn.empty()) {
|
||||
auto& rsp = RspIn.front();
|
||||
uint32_t port_id = 0;
|
||||
if (tag_shift_) {
|
||||
port_id = rsp.tag & ((1 << tag_shift_)-1);
|
||||
rsp.tag >>= tag_shift_;
|
||||
}
|
||||
RspOut.at(port_id).send(rsp, 1);
|
||||
RspIn.pop();
|
||||
|
||||
// process incoming reponses
|
||||
if (!RspOut.at(o).empty()) {
|
||||
auto& rsp = RspOut.at(o).front();
|
||||
uint32_t i = 0;
|
||||
if (lg_num_reqs_ != 0) {
|
||||
i = rsp.tag & (R-1);
|
||||
rsp.tag >>= lg_num_reqs_;
|
||||
}
|
||||
DT(4, this->name() << "-" << rsp);
|
||||
uint32_t j = o * R + i;
|
||||
RspIn.at(j).send(rsp, 1);
|
||||
RspOut.at(o).pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void update_cursor(uint32_t grant) {
|
||||
void update_cursor(uint32_t index, uint32_t grant) {
|
||||
if (type_ == ArbiterType::RoundRobin) {
|
||||
cursor_ = grant + 1;
|
||||
cursors_.at(index) = grant + 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<SimPort<Req>> ReqIn;
|
||||
SimPort<Req> ReqOut;
|
||||
SimPort<Rsp> RspIn;
|
||||
std::vector<SimPort<Rsp>> RspOut;
|
||||
private:
|
||||
ArbiterType type_;
|
||||
uint32_t delay_;
|
||||
std::vector<uint32_t> cursors_;
|
||||
uint32_t lg_num_reqs_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class SMemDemux : public SimObject<SMemDemux> {
|
||||
public:
|
||||
SimPort<MemReq> ReqIn;
|
||||
SimPort<MemRsp> RspIn;
|
||||
|
||||
SimPort<MemReq> ReqSm;
|
||||
SimPort<MemRsp> RspSm;
|
||||
|
||||
SimPort<MemReq> ReqDc;
|
||||
SimPort<MemRsp> RspDc;
|
||||
|
||||
SMemDemux(
|
||||
const SimContext& ctx,
|
||||
const char* name,
|
||||
uint32_t delay = 1
|
||||
) : SimObject<SMemDemux>(ctx, name)
|
||||
, ReqIn(this)
|
||||
, RspIn(this)
|
||||
, ReqSm(this)
|
||||
, RspSm(this)
|
||||
, ReqDc(this)
|
||||
, RspDc(this)
|
||||
, delay_(delay)
|
||||
{}
|
||||
|
||||
void reset() {}
|
||||
|
||||
void tick() {
|
||||
// process incomming requests
|
||||
if (!ReqIn.empty()) {
|
||||
auto& req = ReqIn.front();
|
||||
DT(4, this->name() << "-" << req);
|
||||
if (req.type == AddrType::Shared) {
|
||||
ReqSm.send(req, delay_);
|
||||
} else {
|
||||
ReqDc.send(req, delay_);
|
||||
}
|
||||
ReqIn.pop();
|
||||
}
|
||||
|
||||
// process incoming reponses
|
||||
if (!RspSm.empty()) {
|
||||
auto& rsp = RspSm.front();
|
||||
DT(4, this->name() << "-" << rsp);
|
||||
RspIn.send(rsp, 1);
|
||||
RspSm.pop();
|
||||
}
|
||||
if (!RspDc.empty()) {
|
||||
auto& rsp = RspDc.front();
|
||||
DT(4, this->name() << "-" << rsp);
|
||||
RspIn.send(rsp, 1);
|
||||
RspDc.pop();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t delay_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
@@ -10,21 +23,25 @@
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Warp::Warp(Core *core, uint32_t id)
|
||||
: id_(id)
|
||||
Warp::Warp(Core *core, uint32_t warp_id)
|
||||
: warp_id_(warp_id)
|
||||
, arch_(core->arch())
|
||||
, core_(core)
|
||||
, ireg_file_(core->arch().num_threads(), std::vector<Word>(core->arch().num_regs()))
|
||||
, freg_file_(core->arch().num_threads(), std::vector<FWord>(core->arch().num_regs()))
|
||||
, freg_file_(core->arch().num_threads(), std::vector<uint64_t>(core->arch().num_regs()))
|
||||
, vreg_file_(core->arch().num_threads(), std::vector<Byte>(core->arch().vsize()))
|
||||
{
|
||||
this->clear();
|
||||
this->reset();
|
||||
}
|
||||
|
||||
void Warp::clear() {
|
||||
active_ = false;
|
||||
PC_ = STARTUP_ADDR;
|
||||
void Warp::reset() {
|
||||
PC_ = core_->dcrs().base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR0);
|
||||
#if (XLEN == 64)
|
||||
PC_ = (uint64_t(core_->dcrs().base_dcrs.read(VX_DCR_BASE_STARTUP_ADDR1)) << 32) | PC_;
|
||||
#endif
|
||||
tmask_.reset();
|
||||
for (uint32_t i = 0, n = core_->arch().num_threads(); i < n; ++i) {
|
||||
issued_instrs_ = 0;
|
||||
for (uint32_t i = 0, n = arch_.num_threads(); i < n; ++i) {
|
||||
for (auto& reg : ireg_file_.at(i)) {
|
||||
reg = 0;
|
||||
}
|
||||
@@ -35,31 +52,44 @@ void Warp::clear() {
|
||||
reg = 0;
|
||||
}
|
||||
}
|
||||
uui_gen_.reset();
|
||||
}
|
||||
|
||||
void Warp::eval(pipeline_trace_t *trace) {
|
||||
pipeline_trace_t* Warp::eval() {
|
||||
assert(tmask_.any());
|
||||
|
||||
DPH(2, "Fetch: coreid=" << core_->id() << ", wid=" << id_ << ", tmask=");
|
||||
for (uint32_t i = 0, n = core_->arch().num_threads(); i < n; ++i)
|
||||
DPN(2, tmask_.test(n-i-1));
|
||||
DPN(2, ", PC=0x" << std::hex << PC_ << " (#" << std::dec << trace->uuid << ")" << std::endl);
|
||||
|
||||
/* Fetch and decode. */
|
||||
#ifndef NDEBUG
|
||||
uint32_t instr_uuid = uui_gen_.get_uuid(PC_);
|
||||
uint32_t g_wid = core_->id() * arch_.num_warps() + warp_id_;
|
||||
uint32_t instr_id = instr_uuid & 0xffff;
|
||||
uint32_t instr_ref = instr_uuid >> 16;
|
||||
uint64_t uuid = (uint64_t(instr_ref) << 32) | (g_wid << 16) | instr_id;
|
||||
#else
|
||||
uint64_t uuid = 0;
|
||||
#endif
|
||||
|
||||
DPH(1, "Fetch: cid=" << core_->id() << ", wid=" << warp_id_ << ", tmask=");
|
||||
for (uint32_t i = 0, n = arch_.num_threads(); i < n; ++i)
|
||||
DPN(1, tmask_.test(i));
|
||||
DPN(1, ", PC=0x" << std::hex << PC_ << " (#" << std::dec << uuid << ")" << std::endl);
|
||||
|
||||
// Fetch
|
||||
uint32_t instr_code = 0;
|
||||
core_->icache_read(&instr_code, PC_, sizeof(uint32_t));
|
||||
auto instr = core_->decoder().decode(instr_code);
|
||||
|
||||
// Decode
|
||||
auto instr = core_->decoder_.decode(instr_code);
|
||||
if (!instr) {
|
||||
std::cout << std::hex << "Error: invalid instruction 0x" << instr_code << ", at PC=" << PC_ << std::endl;
|
||||
std::cout << std::hex << "Error: invalid instruction 0x" << instr_code << ", at PC=0x" << PC_ << " (#" << std::dec << uuid << ")" << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
|
||||
DP(2, "Instr 0x" << std::hex << instr_code << ": " << *instr);
|
||||
DP(1, "Instr 0x" << std::hex << instr_code << ": " << *instr);
|
||||
|
||||
// Update trace
|
||||
// Create trace
|
||||
auto trace = new pipeline_trace_t(uuid, arch_);
|
||||
trace->cid = core_->id();
|
||||
trace->wid = id_;
|
||||
trace->wid = warp_id_;
|
||||
trace->PC = PC_;
|
||||
trace->tmask = tmask_;
|
||||
trace->rdest = instr->getRDest();
|
||||
@@ -68,18 +98,20 @@ void Warp::eval(pipeline_trace_t *trace) {
|
||||
// Execute
|
||||
this->execute(*instr, trace);
|
||||
|
||||
DP(4, "Register state:");
|
||||
for (uint32_t i = 0; i < core_->arch().num_regs(); ++i) {
|
||||
DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
DP(5, "Register state:");
|
||||
for (uint32_t i = 0; i < arch_.num_regs(); ++i) {
|
||||
DPN(5, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
// Integer register file
|
||||
for (uint32_t j = 0; j < core_->arch().num_threads(); ++j) {
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(XLEN/4) << std::hex << ireg_file_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
for (uint32_t j = 0; j < arch_.num_threads(); ++j) {
|
||||
DPN(5, ' ' << std::setfill('0') << std::setw(XLEN/4) << std::hex << ireg_file_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
}
|
||||
DPN(4, '|');
|
||||
DPN(5, '|');
|
||||
// Floating point register file
|
||||
for (uint32_t j = 0; j < core_->arch().num_threads(); ++j) {
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(16) << std::hex << freg_file_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
for (uint32_t j = 0; j < arch_.num_threads(); ++j) {
|
||||
DPN(5, ' ' << std::setfill('0') << std::setw(16) << std::hex << freg_file_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
}
|
||||
DPN(4, std::endl);
|
||||
DPN(5, std::endl);
|
||||
}
|
||||
|
||||
return trace;
|
||||
}
|
||||
@@ -1,3 +1,16 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __WARP_H
|
||||
#define __WARP_H
|
||||
|
||||
@@ -7,28 +20,26 @@
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Arch;
|
||||
class Core;
|
||||
class Instr;
|
||||
class pipeline_trace_t;
|
||||
|
||||
struct DomStackEntry {
|
||||
DomStackEntry(const ThreadMask &tmask, Word PC)
|
||||
: tmask(tmask)
|
||||
, PC(PC)
|
||||
, fallThrough(false)
|
||||
, unanimous(false)
|
||||
, fallthrough(false)
|
||||
{}
|
||||
|
||||
DomStackEntry(const ThreadMask &tmask)
|
||||
: tmask(tmask)
|
||||
, PC(0)
|
||||
, fallThrough(true)
|
||||
, unanimous(false)
|
||||
DomStackEntry(const ThreadMask &tmask)
|
||||
: tmask(tmask)
|
||||
, fallthrough(true)
|
||||
{}
|
||||
|
||||
ThreadMask tmask;
|
||||
Word PC;
|
||||
bool fallThrough;
|
||||
bool unanimous;
|
||||
bool fallthrough;
|
||||
};
|
||||
|
||||
struct vtype {
|
||||
@@ -40,72 +51,58 @@ struct vtype {
|
||||
|
||||
class Warp {
|
||||
public:
|
||||
Warp(Core *core, uint32_t id);
|
||||
Warp(Core *core, uint32_t warp_id);
|
||||
|
||||
void clear();
|
||||
|
||||
bool active() const {
|
||||
return active_;
|
||||
}
|
||||
|
||||
void suspend() {
|
||||
active_ = false;
|
||||
}
|
||||
|
||||
void activate() {
|
||||
active_ = true;
|
||||
}
|
||||
|
||||
std::size_t getActiveThreads() const {
|
||||
if (active_)
|
||||
return tmask_.count();
|
||||
return 0;
|
||||
}
|
||||
void reset();
|
||||
|
||||
uint32_t id() const {
|
||||
return id_;
|
||||
return warp_id_;
|
||||
}
|
||||
|
||||
uint32_t getPC() const {
|
||||
Word getPC() const {
|
||||
return PC_;
|
||||
}
|
||||
|
||||
void setPC(uint32_t PC) {
|
||||
void setPC(Word PC) {
|
||||
PC_ = PC;
|
||||
}
|
||||
|
||||
void setTmask(size_t index, bool value) {
|
||||
tmask_.set(index, value);
|
||||
active_ = tmask_.any();
|
||||
}
|
||||
|
||||
uint32_t getTmask() const {
|
||||
if (active_)
|
||||
return tmask_.to_ulong();
|
||||
return 0;
|
||||
uint64_t getTmask() const {
|
||||
return tmask_.to_ulong();
|
||||
}
|
||||
|
||||
uint32_t getIRegValue(uint32_t reg) const {
|
||||
Word getIRegValue(uint32_t reg) const {
|
||||
return ireg_file_.at(0).at(reg);
|
||||
}
|
||||
|
||||
void eval(pipeline_trace_t *);
|
||||
uint64_t incr_instrs() {
|
||||
return issued_instrs_++;
|
||||
}
|
||||
|
||||
pipeline_trace_t* eval();
|
||||
|
||||
private:
|
||||
|
||||
void execute(const Instr &instr, pipeline_trace_t *trace);
|
||||
|
||||
UUIDGenerator uui_gen_;
|
||||
|
||||
uint32_t id_;
|
||||
uint32_t warp_id_;
|
||||
const Arch& arch_;
|
||||
Core *core_;
|
||||
bool active_;
|
||||
uint64_t issued_instrs_;
|
||||
|
||||
Word PC_;
|
||||
ThreadMask tmask_;
|
||||
|
||||
std::vector<std::vector<Word>> ireg_file_;
|
||||
std::vector<std::vector<FWord>> freg_file_;
|
||||
std::vector<std::vector<Byte>> vreg_file_;
|
||||
std::stack<DomStackEntry> dom_stack_;
|
||||
ThreadMask tmask_;
|
||||
|
||||
std::vector<std::vector<Word>> ireg_file_;
|
||||
std::vector<std::vector<uint64_t>> freg_file_;
|
||||
std::vector<std::vector<Byte>> vreg_file_;
|
||||
std::stack<DomStackEntry> ipdom_stack_;
|
||||
|
||||
struct vtype vtype_;
|
||||
uint32_t vl_;
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
DESTDIR ?= .
|
||||
RTL_DIR = ../../hw/rtl
|
||||
DPI_DIR = ../../hw/dpi
|
||||
SCRIPT_DIR = ../../hw/scripts
|
||||
THIRD_PARTY_DIR = ../../third_party
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I.. -I../../../hw -I../../common
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I../$(THIRD_PARTY_DIR)
|
||||
|
||||
LDFLAGS += -shared ../$(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -L../$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_PIPELINE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_ICACHE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_DCACHE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CORE_MEM
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_BANK
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_MSHR
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_TAG
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE_DATA
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_AFU
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_SCOPE
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
|
||||
|
||||
DBG_FLAGS += $(DBG_TRACE_FLAGS)
|
||||
|
||||
SRCS = ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += fpga.cpp opae_sim.cpp
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) $(TEX_INCLUDE)
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/afu -I$(RTL_DIR)/afu/ccip
|
||||
|
||||
TOP = vortex_afu_shim
|
||||
|
||||
VL_FLAGS = --exe --cc $(TOP) --top-module $(TOP)
|
||||
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO -Wno-EOFNEWLINE
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
|
||||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable scope analyzer
|
||||
ifdef SCOPE
|
||||
VL_FLAGS += -DSCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CXXFLAGS += -DNOPAE
|
||||
|
||||
# ALU backend
|
||||
VL_FLAGS += -DIMUL_DPI
|
||||
VL_FLAGS += -DIDIV_DPI
|
||||
|
||||
# FPU backend
|
||||
FPU_CORE ?= FPU_DPI
|
||||
VL_FLAGS += -D$(FPU_CORE)
|
||||
|
||||
PROJECT = libopae-c-vlsim.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
vortex_afu.h : $(RTL_DIR)/afu/vortex_afu.vh
|
||||
$(SCRIPT_DIR)/gen_config.py -i $(RTL_DIR)/afu/vortex_afu.vh -o vortex_afu.h
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) vortex_afu.h
|
||||
verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(DESTDIR)/$(PROJECT)
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/$(PROJECT)
|
||||
@@ -1,30 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
namespace vortex {
|
||||
|
||||
class RAM;
|
||||
|
||||
class opae_sim {
|
||||
public:
|
||||
|
||||
opae_sim();
|
||||
virtual ~opae_sim();
|
||||
|
||||
int prepare_buffer(uint64_t len, void **buf_addr, uint64_t *wsid, int flags);
|
||||
|
||||
void release_buffer(uint64_t wsid);
|
||||
|
||||
void get_io_address(uint64_t wsid, uint64_t *ioaddr);
|
||||
|
||||
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value);
|
||||
|
||||
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);
|
||||
|
||||
private:
|
||||
|
||||
class Impl;
|
||||
Impl* impl_;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNOPTFLAT -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule WIDTH -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNUSED -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule LITENDIAN -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule IMPORTSTAR -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule PINCONNECTEMPTY -file "../../../hw/rtl/fp_cores/fpnew/*"
|
||||
lint_off -file "../rtl/fp_cores/fpnew/*"
|
||||
Reference in New Issue
Block a user