Merge branch 'main' of https://github.com/ucb-bar/radiance
This commit is contained in:
7
.gitmodules
vendored
7
.gitmodules
vendored
@@ -2,5 +2,10 @@
|
||||
path = src/main/resources/vsrc/vortex
|
||||
url = git@github.com:hansungk/vortex-private.git
|
||||
[submodule "radpie"]
|
||||
path = radpie
|
||||
path = cyclotron
|
||||
url = git@github.com:hansungk/radpie.git
|
||||
[submodule "cyclotron-main"]
|
||||
path = cyclotron-main
|
||||
url = https://github.com/hansungk/cyclotron-main.git
|
||||
[submodule "cyclotron"]
|
||||
url = https://github.com/hansungk/cyclotron.git
|
||||
|
||||
1
cyclotron
Submodule
1
cyclotron
Submodule
Submodule cyclotron added at ca6933c4ec
1
cyclotron-main
Submodule
1
cyclotron-main
Submodule
Submodule cyclotron-main added at 25f2e7734b
39
radiance.mk
39
radiance.mk
@@ -3,15 +3,18 @@
|
||||
##############################################################
|
||||
|
||||
VORTEX_SRC_DIR = $(base_dir)/generators/radiance/src/main/resources/vsrc/vortex
|
||||
RADPIE_SRC_DIR = $(base_dir)/generators/radiance/radpie
|
||||
RADPIE_BUILD_DIR = $(RADPIE_SRC_DIR)/target/release
|
||||
CYCLOTRON_SRC_DIR = $(base_dir)/generators/radiance/cyclotron
|
||||
CYCLOTRON_BUILD_DIR = $(CYCLOTRON_SRC_DIR)/target/debug
|
||||
# CYCLOTRON_BUILD_DIR = $(CYCLOTRON_SRC_DIR)/target/release
|
||||
RADIANCE_CSRC_DIR = $(base_dir)/generators/radiance/src/main/resources/csrc
|
||||
RADIANCE_VSRC_DIR = $(base_dir)/generators/radiance/src/main/resources/vsrc
|
||||
|
||||
##################################################################
|
||||
# THE FOLLOWING MUST BE += operators
|
||||
##################################################################
|
||||
|
||||
# EXTRA_SIM_REQS += radpie
|
||||
# EXTRA_SIM_LDFLAGS += -L$(RADPIE_BUILD_DIR) -Wl,-rpath,$(RADPIE_BUILD_DIR) -lradpie
|
||||
EXTRA_SIM_REQS += cyclotron
|
||||
EXTRA_SIM_LDFLAGS += -L$(CYCLOTRON_BUILD_DIR) -Wl,-rpath,$(CYCLOTRON_BUILD_DIR) -lcyclotron
|
||||
ifeq ($(shell echo $(CONFIG) | grep -E "SynConfig$$"),$(CONFIG))
|
||||
EXTRA_SIM_PREPROC_DEFINES += +define+SYNTHESIS +define+NDEBUG +define+DPI_DISABLE
|
||||
endif
|
||||
@@ -31,21 +34,25 @@ VCS_NONCC_OPTS += +vcs+initreg+random
|
||||
|
||||
# cargo handles building of Rust files all on its own, so make this a PHONY
|
||||
# target to run cargo unconditionally
|
||||
.PHONY: radpie
|
||||
radpie:
|
||||
cd $(RADPIE_SRC_DIR) && cargo build --release
|
||||
.PHONY: cyclotron
|
||||
cyclotron:
|
||||
cd $(CYCLOTRON_SRC_DIR) && cargo build # --release
|
||||
|
||||
EXTRA_SIM_REQS += vortex_vsrc.$(CONFIG)
|
||||
# below manipulation of VORTEX_VLOG_SOURCES doesn't work if we try to reuse
|
||||
# below manipulation of RADIANCE_EXTERNAL_SRCS doesn't work if we try to reuse
|
||||
# $(call lookup_srcs) from common.mk, the variable doesn't expand somehow
|
||||
ifeq ($(shell which fd 2> /dev/null),)
|
||||
VORTEX_VLOG_SOURCES := $(shell find -L $(VORTEX_SRC_DIR) -type f -iname "*.sv" -o -iname "*.vh" -o -iname "*.v")
|
||||
ifeq ($(shell which fdfd 2> /dev/null),)
|
||||
# RADIANCE_EXTERNAL_SRCS := $(shell find -L $(VORTEX_SRC_DIR) -type f -iname "*.sv" -o -iname "*.vh" -o -iname "*.v")
|
||||
RADIANCE_EXTERNAL_SRCS := $(shell find -L $(RADIANCE_VSRC_DIR) -type f -iname "*.sv" -o -iname "*.vh" -o -iname "*.v")
|
||||
RADIANCE_EXTERNAL_SRCS += $(shell find -L $(RADIANCE_CSRC_DIR) -type f)
|
||||
else
|
||||
VORTEX_VLOG_SOURCES := $(shell fd -L -t f -e "sv" -e "vh" -e "v" . $(VORTEX_SRC_DIR))
|
||||
# RADIANCE_EXTERNAL_SRCS := $(shell fdfind -L -t f -e "sv" -e "vh" -e "v" . $(VORTEX_SRC_DIR))
|
||||
RADIANCE_EXTERNAL_SRCS := $(shell fdfind -L -t f -e "sv" -e "vh" -e "v" . $(RADIANCE_VSRC_DIR))
|
||||
RADIANCE_EXTERNAL_SRCS += $(shell fdfind -L -t f . $(RADIANCE_CSRC_DIR))
|
||||
endif
|
||||
# VORTEX_COLLATERAL := $(patsubst $(VORTEX_SRC_DIR)%,$(GEN_COLLATERAL_DIR)%,$(VORTEX_VLOG_SOURCES))
|
||||
# check if expanded
|
||||
# $(info VORTEX_VLOG_SOURCES: $(VORTEX_VLOG_SOURCES))
|
||||
|
||||
# for debug; check if expanded
|
||||
# $(info RADIANCE_EXTERNAL_SRCS: $(RADIANCE_EXTERNAL_SRCS))
|
||||
|
||||
# For every Vortex verilog source file, if there's a matching file in
|
||||
# gen-collateral/, copy them over. This is a hacky way to ensure the changes
|
||||
@@ -53,8 +60,8 @@ endif
|
||||
# necessary when common.mk does not trigger chipyard jar rebuild upon verilog
|
||||
# source updates, in which case we need to manually ensure the up-to-date-ness
|
||||
# of gen-collateral/.
|
||||
vortex_vsrc.$(CONFIG): $(VORTEX_VLOG_SOURCES)
|
||||
@for file in $(VORTEX_VLOG_SOURCES); do \
|
||||
vortex_vsrc.$(CONFIG): $(RADIANCE_EXTERNAL_SRCS)
|
||||
@for file in $(RADIANCE_EXTERNAL_SRCS); do \
|
||||
filename=$$(basename "$$file"); \
|
||||
if [ -f $(GEN_COLLATERAL_DIR)/$$filename ]; then \
|
||||
if ! diff $$file $(GEN_COLLATERAL_DIR)/$$filename &>/dev/null ; then \
|
||||
|
||||
1
radpie
1
radpie
Submodule radpie deleted from 493b8e10a5
43
src/main/resources/csrc/SimEmulator.cc
Normal file
43
src/main/resources/csrc/SimEmulator.cc
Normal file
@@ -0,0 +1,43 @@
|
||||
#ifndef NO_VPI
|
||||
#include <vpi_user.h>
|
||||
#include <svdpi.h>
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
|
||||
extern "C" void emulator_init_rs(int num_lanes);
|
||||
extern "C" void emulator_tick_rs(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||
long long *vec_a_address,
|
||||
uint8_t *vec_a_is_store, int *vec_a_size,
|
||||
long long *vec_a_data, uint8_t *vec_d_ready,
|
||||
uint8_t *vec_d_valid, uint8_t *vec_d_is_store,
|
||||
int *vec_d_size, long long *vec_d_data,
|
||||
uint8_t inflight, uint8_t *finished);
|
||||
// extern "C" void emulator_generate_rs(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||
// long long *vec_a_address,
|
||||
// uint8_t *vec_a_is_store, int *vec_a_size,
|
||||
// long long *vec_a_data,
|
||||
// uint8_t *vec_d_ready, uint8_t inflight,
|
||||
// uint8_t *finished);
|
||||
|
||||
extern "C" void emulator_init(int num_lanes) { emulator_init_rs(num_lanes); }
|
||||
|
||||
extern "C" void emulator_tick(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||
long long *vec_a_address, uint8_t *vec_a_is_store,
|
||||
int *vec_a_size, long long *vec_a_data,
|
||||
uint8_t *vec_d_ready, uint8_t *vec_d_valid,
|
||||
uint8_t *vec_d_is_store, int *vec_d_size,
|
||||
long long *vec_d_data, uint8_t inflight,
|
||||
uint8_t *finished) {
|
||||
emulator_tick_rs(vec_a_ready, vec_a_valid, vec_a_address, vec_a_is_store,
|
||||
vec_a_size, vec_a_data, vec_d_ready, vec_d_valid,
|
||||
vec_d_is_store, vec_d_size, vec_d_data, inflight, finished);
|
||||
}
|
||||
|
||||
// extern "C" void emulator_generate(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||
// long long *vec_a_address,
|
||||
// uint8_t *vec_a_is_store, int *vec_a_size,
|
||||
// long long *vec_a_data, uint8_t *vec_d_ready,
|
||||
// uint8_t inflight, uint8_t *finished) {
|
||||
// emulator_generate_rs(vec_a_ready, vec_a_valid, vec_a_address, vec_a_is_store,
|
||||
// vec_a_size, vec_a_data, vec_d_ready, inflight, finished);
|
||||
// }
|
||||
@@ -2,7 +2,6 @@
|
||||
#include <vpi_user.h>
|
||||
#include <svdpi.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
extern "C" void memfuzz_init_rs(int num_lanes);
|
||||
|
||||
131
src/main/resources/vsrc/SimEmulator.v
Normal file
131
src/main/resources/vsrc/SimEmulator.v
Normal file
@@ -0,0 +1,131 @@
|
||||
`include "SimDefaults.vh"
|
||||
|
||||
import "DPI-C" function void emulator_init(
|
||||
input longint num_lanes
|
||||
);
|
||||
|
||||
// Make sure to sync the parameters for:
|
||||
// (1) import "DPI-C" declaration
|
||||
// (2) C function declaration
|
||||
// (3) DPI function calls inside initial/always blocks
|
||||
import "DPI-C" function void emulator_tick
|
||||
(
|
||||
input bit vec_a_ready[`MAX_NUM_LANES],
|
||||
output bit vec_a_valid[`MAX_NUM_LANES],
|
||||
output longint vec_a_address[`MAX_NUM_LANES],
|
||||
output bit vec_a_is_store[`MAX_NUM_LANES],
|
||||
output int vec_a_size[`MAX_NUM_LANES],
|
||||
output longint vec_a_data[`MAX_NUM_LANES],
|
||||
|
||||
output bit vec_d_ready[`MAX_NUM_LANES],
|
||||
input bit vec_d_valid[`MAX_NUM_LANES],
|
||||
input bit vec_d_is_store[`MAX_NUM_LANES],
|
||||
input int vec_d_size[`MAX_NUM_LANES],
|
||||
input longint vec_d_data[`MAX_NUM_LANES],
|
||||
|
||||
input bit inflight,
|
||||
output bit finished
|
||||
);
|
||||
|
||||
module SimEmulator #(parameter NUM_LANES = 4) (
|
||||
input clock,
|
||||
input reset,
|
||||
|
||||
input [NUM_LANES-1:0] a_ready,
|
||||
output [NUM_LANES-1:0] a_valid,
|
||||
output [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] a_address,
|
||||
output [NUM_LANES-1:0] a_is_store,
|
||||
output [`SIMMEM_LOGSIZE_WIDTH*NUM_LANES-1:0] a_size,
|
||||
output [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] a_data,
|
||||
|
||||
output [NUM_LANES-1:0] d_ready,
|
||||
input [NUM_LANES-1:0] d_valid,
|
||||
input [NUM_LANES-1:0] d_is_store,
|
||||
input [`SIMMEM_LOGSIZE_WIDTH*NUM_LANES-1:0] d_size,
|
||||
input [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] d_data,
|
||||
// TODO: d_mask
|
||||
|
||||
input inflight,
|
||||
output finished
|
||||
);
|
||||
// "in": C->verilog, "out": verilog->C
|
||||
// need to be in ascending order to match with C indexing
|
||||
// C array sizes are static, so need to use MAX_NUM_LANES
|
||||
bit __out_a_ready [0:`MAX_NUM_LANES-1];
|
||||
bit __in_a_valid [0:`MAX_NUM_LANES-1];
|
||||
longint __in_a_address [0:`MAX_NUM_LANES-1];
|
||||
bit __in_a_is_store [0:`MAX_NUM_LANES-1];
|
||||
int __in_a_size [0:`MAX_NUM_LANES-1];
|
||||
longint __in_a_data [0:`MAX_NUM_LANES-1];
|
||||
bit __in_d_ready [0:`MAX_NUM_LANES-1];
|
||||
bit __out_d_valid [0:`MAX_NUM_LANES-1];
|
||||
bit __out_d_is_store [0:`MAX_NUM_LANES-1];
|
||||
int __out_d_size [0:`MAX_NUM_LANES-1];
|
||||
longint __out_d_data [0:`MAX_NUM_LANES-1];
|
||||
bit __out_inflight;
|
||||
bit __in_finished;
|
||||
|
||||
genvar g;
|
||||
generate
|
||||
for (g = 0; g < NUM_LANES; g = g + 1) begin
|
||||
assign __out_a_ready[g] = a_ready[g];
|
||||
assign a_valid[g] = __in_a_valid[g];
|
||||
assign a_address[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH]
|
||||
= __in_a_address[g][`SIMMEM_DATA_WIDTH-1:0];
|
||||
assign a_is_store[g] = __in_a_is_store[g];
|
||||
assign a_size[`SIMMEM_LOGSIZE_WIDTH*g +: `SIMMEM_LOGSIZE_WIDTH]
|
||||
= __in_a_size[g][`SIMMEM_LOGSIZE_WIDTH-1:0];
|
||||
assign a_data[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH]
|
||||
= __in_a_data[g][`SIMMEM_DATA_WIDTH-1:0];
|
||||
assign d_ready[g] = __in_d_ready[g];
|
||||
assign __out_d_valid[g] = d_valid[g];
|
||||
assign __out_d_is_store[g] = d_is_store[g];
|
||||
assign __out_d_size[g] = d_size[`SIMMEM_LOGSIZE_WIDTH*g +: `SIMMEM_LOGSIZE_WIDTH];
|
||||
assign __out_d_data[g] = d_data[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH];
|
||||
end
|
||||
assign __out_inflight = inflight;
|
||||
endgenerate
|
||||
assign finished = __in_finished;
|
||||
|
||||
initial begin
|
||||
emulator_init(NUM_LANES);
|
||||
end
|
||||
|
||||
// negedge might make it easier to view waveform since DPI changes are
|
||||
// instant and make it look like they happen before the clockedge
|
||||
always @(posedge clock) begin
|
||||
if (reset) begin
|
||||
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
|
||||
__in_a_valid[tid] = 1'b0;
|
||||
__in_a_address[tid] = `SIMMEM_DATA_WIDTH'b0;
|
||||
__in_a_is_store[tid] = 1'b0;
|
||||
__in_a_size[tid] = 32'b0;
|
||||
__in_a_data[tid] = `SIMMEM_DATA_WIDTH'b0;
|
||||
__in_d_ready[tid] = 1'b0;
|
||||
end
|
||||
__in_finished = 1'b0;
|
||||
end else begin
|
||||
emulator_tick(
|
||||
__out_a_ready,
|
||||
__in_a_valid,
|
||||
__in_a_address,
|
||||
__in_a_is_store,
|
||||
__in_a_size,
|
||||
__in_a_data,
|
||||
|
||||
__in_d_ready,
|
||||
__out_d_valid,
|
||||
__out_d_is_store,
|
||||
__out_d_size,
|
||||
__out_d_data,
|
||||
|
||||
__out_inflight,
|
||||
__in_finished
|
||||
);
|
||||
// for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
|
||||
// $display("verilog: %04d a_valid[%d]=%d, a_address[%d]=0x%x, d_ready[%d]=%d",
|
||||
// $time, tid, __in_a_valid[tid], tid, __in_a_address[tid], tid, __in_d_ready[tid]);
|
||||
// end
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
@@ -47,7 +47,7 @@ module SimMemFuzzer #(parameter NUM_LANES = 4) (
|
||||
input inflight,
|
||||
output finished
|
||||
);
|
||||
// "in": verilog->C, "out": C->verilog
|
||||
// "in": C->verilog, "out": verilog->C
|
||||
// need to be in ascending order to match with C indexing
|
||||
// C array sizes are static, so need to use MAX_NUM_LANES
|
||||
bit __out_a_ready [0:`MAX_NUM_LANES-1];
|
||||
|
||||
246
src/main/scala/radiance/core/Emulator.scala
Normal file
246
src/main/scala/radiance/core/Emulator.scala
Normal file
@@ -0,0 +1,246 @@
|
||||
package radiance.core
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import org.chipsalliance.cde.config.{Field, Parameters}
|
||||
import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp}
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.diplomacy.{IdRange, AddressSet, BufferParams}
|
||||
import radiance.memory.{SourceGenerator, TraceLine, TLPrintf}
|
||||
|
||||
case class SIMTCoreParams(
|
||||
nWarps: Int = 4, // # of warps in the core
|
||||
nCoreLanes: Int = 4, // # of SIMT threads in the core
|
||||
nMemLanes: Int = 4, // # of memory lanes in the memory interface to the
|
||||
// cache; relates to the LSU lanes
|
||||
nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes
|
||||
)
|
||||
case class MemtraceCoreParams(
|
||||
tracefilename: String = "undefined",
|
||||
traceHasSource: Boolean = false
|
||||
)
|
||||
|
||||
case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/ )
|
||||
case object MemtraceCoreKey
|
||||
extends Field[Option[MemtraceCoreParams]](None /*default*/ )
|
||||
|
||||
// #############################################################################
|
||||
// FIXME: copy-paste from MemFuzzer
|
||||
// #############################################################################
|
||||
|
||||
class Emulator(
|
||||
numLanes: Int,
|
||||
numSrcIds: Int,
|
||||
wordSizeInBytes: Int,
|
||||
)(implicit p: Parameters)
|
||||
extends LazyModule {
|
||||
val laneNodes = Seq.tabulate(numLanes) { i =>
|
||||
val clientParam = Seq(
|
||||
TLMasterParameters.v1(
|
||||
name = "Emulator" + i.toString,
|
||||
sourceId = IdRange(0, numSrcIds)
|
||||
// visibility = Seq(AddressSet(0x0000, 0xffffff))
|
||||
)
|
||||
)
|
||||
TLClientNode(Seq(TLMasterPortParameters.v1(clientParam)))
|
||||
}
|
||||
|
||||
val node = TLIdentityNode()
|
||||
laneNodes.foreach(node := _)
|
||||
|
||||
lazy val module = new EmulatorImp(this, numLanes, numSrcIds, wordSizeInBytes)
|
||||
}
|
||||
|
||||
class EmulatorImp(
|
||||
outer: Emulator,
|
||||
numLanes : Int,
|
||||
numSrcIds: Int,
|
||||
wordSizeInBytes: Int,
|
||||
) extends LazyModuleImp(outer) {
|
||||
val io = IO(new Bundle {
|
||||
val finished = Output(Bool())
|
||||
})
|
||||
val sim = Module(new SimEmulator(numLanes))
|
||||
|
||||
sim.io.clock := clock
|
||||
sim.io.reset := reset.asBool
|
||||
|
||||
sim.io.a.ready := VecInit(outer.laneNodes.map { node =>
|
||||
val (tlOut, _) = node.out(0)
|
||||
tlOut.a.ready
|
||||
}).asUInt
|
||||
|
||||
io.finished := sim.io.finished
|
||||
|
||||
// connect Verilog <-> Chisel IO
|
||||
// Verilog IO flattened across all lanes
|
||||
val laneReqs = Wire(Vec(numLanes, Decoupled(new TraceLine)))
|
||||
val addrW = laneReqs(0).bits.address.getWidth
|
||||
val sizeW = laneReqs(0).bits.size.getWidth
|
||||
val dataW = laneReqs(0).bits.data.getWidth
|
||||
laneReqs.zipWithIndex.foreach { case (req, i) =>
|
||||
req.valid := sim.io.a.valid(i)
|
||||
req.bits.source := 0.U // DPI doesn't generate contain source id
|
||||
req.bits.address := sim.io.a.address(addrW * (i + 1) - 1, addrW * i)
|
||||
req.bits.is_store := sim.io.a.is_store(i)
|
||||
req.bits.size := sim.io.a.size(sizeW * (i + 1) - 1, sizeW * i)
|
||||
req.bits.data := sim.io.a.data(dataW * (i + 1) - 1, dataW * i)
|
||||
}
|
||||
sim.io.a.ready := VecInit(laneReqs.map(_.ready)).asUInt
|
||||
|
||||
val laneResps = Wire(Vec(numLanes, Flipped(Decoupled(new TraceLine))))
|
||||
laneResps.zipWithIndex.foreach { case (resp, i) =>
|
||||
resp.ready := sim.io.d.ready(i)
|
||||
// TODO: not handled in DPI
|
||||
resp.bits.source := DontCare
|
||||
resp.bits.address := DontCare
|
||||
resp.bits.data := DontCare
|
||||
}
|
||||
sim.io.d.valid := VecInit(laneResps.map(_.valid)).asUInt
|
||||
sim.io.d.is_store := VecInit(laneResps.map(_.bits.is_store)).asUInt
|
||||
sim.io.d.size := VecInit(laneResps.map(_.bits.size)).asUInt
|
||||
sim.io.d.data := VecInit(laneResps.map(_.bits.data)).asUInt
|
||||
|
||||
val sourceGens = Seq.fill(numLanes)(
|
||||
Module(
|
||||
new SourceGenerator(
|
||||
log2Ceil(numSrcIds),
|
||||
ignoreInUse = false
|
||||
)
|
||||
)
|
||||
)
|
||||
val anyInflight = sourceGens.map(_.io.inflight).reduce(_ || _)
|
||||
sim.io.inflight := anyInflight
|
||||
|
||||
// Take requests off of the queue and generate TL requests
|
||||
(outer.laneNodes zip (laneReqs zip laneResps)).zipWithIndex.foreach {
|
||||
case ((node, (req, resp)), lane) =>
|
||||
val (tlOut, edge) = node.out(0)
|
||||
|
||||
// Requests --------------------------------------------------------------
|
||||
//
|
||||
// Core only makes accesses of granularity larger than a word, so we want
|
||||
// the trace driver to act so as well.
|
||||
// That means if req.size is smaller than word size, we need to pad data
|
||||
// with zeros to generate a word-size request, and set mask accordingly.
|
||||
val offsetInWord = req.bits.address % wordSizeInBytes.U
|
||||
val subword = req.bits.size < log2Ceil(wordSizeInBytes).U
|
||||
|
||||
// `mask` is currently unused
|
||||
// val mask = Wire(UInt(wordSizeInBytes.W))
|
||||
val wordData = Wire(UInt((wordSizeInBytes * 8 * 2).W))
|
||||
val sizeInBytes = Wire(UInt((sizeW + 1).W))
|
||||
sizeInBytes := (1.U) << req.bits.size
|
||||
// mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U)
|
||||
wordData := Mux(subword, req.bits.data << (offsetInWord * 8.U), req.bits.data)
|
||||
val wordAlignedAddress =
|
||||
req.bits.address & ~((1 << log2Ceil(wordSizeInBytes)) - 1).U(addrW.W)
|
||||
val wordAlignedSize = Mux(subword, 2.U, req.bits.size)
|
||||
|
||||
val sourceGen = sourceGens(lane)
|
||||
sourceGen.io.gen := tlOut.a.fire
|
||||
sourceGen.io.reclaim.valid := tlOut.d.fire
|
||||
sourceGen.io.reclaim.bits := tlOut.d.bits.source
|
||||
sourceGen.io.meta := DontCare
|
||||
|
||||
val (plegal, pbits) = edge.Put(
|
||||
fromSource = sourceGen.io.id.bits,
|
||||
toAddress = wordAlignedAddress,
|
||||
lgSize = wordAlignedSize, // trace line already holds log2(size)
|
||||
// data should be aligned to beatBytes
|
||||
data =
|
||||
(wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt
|
||||
)
|
||||
val (glegal, gbits) = edge.Get(
|
||||
fromSource = sourceGen.io.id.bits,
|
||||
toAddress = wordAlignedAddress,
|
||||
lgSize = wordAlignedSize
|
||||
)
|
||||
val legal = Mux(req.bits.is_store, plegal, glegal)
|
||||
val bits = Mux(req.bits.is_store, pbits, gbits)
|
||||
|
||||
tlOut.a.valid := req.valid && sourceGen.io.id.valid
|
||||
req.ready := tlOut.a.ready && sourceGen.io.id.valid
|
||||
|
||||
when(tlOut.a.fire) {
|
||||
assert(legal, "illegal TL req gen")
|
||||
}
|
||||
tlOut.a.bits := bits
|
||||
|
||||
// Responses -------------------------------------------------------------
|
||||
//
|
||||
tlOut.d.ready := resp.ready
|
||||
resp.valid := tlOut.d.valid
|
||||
resp.bits.is_store := !edge.hasData(tlOut.d.bits)
|
||||
resp.bits.size := tlOut.d.bits.size
|
||||
|
||||
tlOut.b.ready := true.B
|
||||
tlOut.c.valid := false.B
|
||||
tlOut.e.valid := false.B
|
||||
|
||||
// debug
|
||||
dontTouch(req)
|
||||
when(tlOut.a.valid) {
|
||||
printf(s"Lane ${lane}: ");
|
||||
TLPrintf(
|
||||
"Emulator",
|
||||
tlOut.a.bits.source,
|
||||
tlOut.a.bits.address,
|
||||
tlOut.a.bits.size,
|
||||
tlOut.a.bits.mask,
|
||||
req.bits.is_store,
|
||||
tlOut.a.bits.data,
|
||||
req.bits.data
|
||||
)
|
||||
}
|
||||
dontTouch(tlOut.a)
|
||||
dontTouch(tlOut.d)
|
||||
}
|
||||
|
||||
// when(traceFinished && allReqReclaimed && noValidReqs) {
|
||||
// assert(
|
||||
// false.B,
|
||||
// "\n\n\nsimulation Successfully finished\n\n\n (this assertion intentional fail upon MemTracer termination)"
|
||||
// )
|
||||
// }
|
||||
}
|
||||
|
||||
class SimEmulator(numLanes: Int)
|
||||
extends BlackBox(Map("NUM_LANES" -> numLanes))
|
||||
with HasBlackBoxResource {
|
||||
val traceLineT = new TraceLine
|
||||
val addrW = traceLineT.address.getWidth
|
||||
val sizeW = traceLineT.size.getWidth
|
||||
val dataW = traceLineT.data.getWidth
|
||||
val io = IO(new Bundle {
|
||||
val clock = Input(Clock())
|
||||
val reset = Input(Bool())
|
||||
val inflight = Input(Bool())
|
||||
val finished = Output(Bool())
|
||||
|
||||
val a =
|
||||
new Bundle {
|
||||
val ready = Input(UInt(numLanes.W))
|
||||
val valid = Output(UInt(numLanes.W))
|
||||
// Chisel can't interface with Verilog 2D port, so flatten all lanes into
|
||||
// single wide 1D array.
|
||||
val address = Output(UInt((addrW * numLanes).W))
|
||||
val is_store = Output(UInt(numLanes.W))
|
||||
val size = Output(UInt((sizeW * numLanes).W))
|
||||
val data = Output(UInt((dataW * numLanes).W))
|
||||
}
|
||||
val d =
|
||||
new Bundle {
|
||||
val ready = Output(UInt(numLanes.W))
|
||||
val valid = Input(UInt(numLanes.W))
|
||||
val is_store = Input(UInt(numLanes.W))
|
||||
val size = Input(UInt((sizeW * numLanes).W))
|
||||
val data = Input(UInt((dataW * numLanes).W))
|
||||
}
|
||||
})
|
||||
|
||||
addResource("/vsrc/SimDefaults.vh")
|
||||
addResource("/vsrc/SimEmulator.v")
|
||||
addResource("/csrc/SimEmulator.cc")
|
||||
}
|
||||
|
||||
@@ -147,7 +147,7 @@ class DotProductPipe(dim: Int, inputType: tile.FType, outputType: tile.FType) ex
|
||||
val mulSigWidth = m.io.rawOut.sigWidth
|
||||
val roundRawFNToRecFN =
|
||||
Module(new hardfloat.RoundAnyRawFNToRecFN(
|
||||
mulExpWidth, mulSigWidth, outExpWidth, outSigWidth, 0))
|
||||
mulExpWidth, mulSigWidth, expWidth, sigWidth, 0))
|
||||
roundRawFNToRecFN.io.invalidExc := m.io.invalidExc
|
||||
roundRawFNToRecFN.io.infiniteExc := false.B
|
||||
roundRawFNToRecFN.io.in := m.io.rawOut
|
||||
@@ -169,7 +169,7 @@ class DotProductPipe(dim: Int, inputType: tile.FType, outputType: tile.FType) ex
|
||||
|
||||
// instantiate wires for input values to each reduction pipeline stage
|
||||
val interim = (log2Dim to 0 by -1).map { i =>
|
||||
Wire(Valid(Vec(1 << i, Bits(recOutFLen.W))))
|
||||
Wire(Valid(Vec(1 << i, Bits(recInFLen.W))))
|
||||
}
|
||||
// instantiate wires for pipe registers for C
|
||||
val interimC = (log2Dim to 0 by -1).map( _ => Wire(Valid(Bits(recOutFLen.W))) )
|
||||
@@ -186,7 +186,7 @@ class DotProductPipe(dim: Int, inputType: tile.FType, outputType: tile.FType) ex
|
||||
require(inputs.bits.length == 2 * outputs.bits.length)
|
||||
val thisDim = inputs.bits.length
|
||||
val adders = Seq.fill(thisDim / 2)(
|
||||
Module(new hardfloat.AddRecFN(outExpWidth, outSigWidth))
|
||||
Module(new hardfloat.AddRecFN(expWidth, sigWidth))
|
||||
)
|
||||
val addOuts = adders.zipWithIndex.map { case (a, i) =>
|
||||
a.io.subOp := 0.U // FIXME dont know what this is
|
||||
@@ -212,9 +212,15 @@ class DotProductPipe(dim: Int, inputType: tile.FType, outputType: tile.FType) ex
|
||||
// add stages end ------------------------------------------------------------
|
||||
|
||||
// add final A and B dot-product result to accumulator C
|
||||
val conv = Module(new hardfloat.RecFNToRecFN(expWidth, sigWidth, outExpWidth, outSigWidth))
|
||||
conv.io.in := addStageOut.bits(0)
|
||||
conv.io.roundingMode := hardfloat.consts.round_near_even
|
||||
conv.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||
// assert(conv.io.exceptionFlags === 0.U)
|
||||
|
||||
val acc = Module(new hardfloat.AddRecFN(outExpWidth, outSigWidth))
|
||||
acc.io.subOp := 0.U // FIXME
|
||||
acc.io.a := addStageOut.bits(0)
|
||||
acc.io.a := conv.io.out
|
||||
acc.io.b := addStageC.bits
|
||||
acc.io.roundingMode := hardfloat.consts.round_near_even
|
||||
acc.io.detectTininess := hardfloat.consts.tininess_afterRounding
|
||||
|
||||
@@ -4,6 +4,7 @@ import freechips.rocketchip.diplomacy.LazyModule
|
||||
import freechips.rocketchip.subsystem._
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
import freechips.rocketchip.tilelink._
|
||||
import radiance.core.{SIMTCoreKey, MemtraceCoreKey}
|
||||
|
||||
// TODO: possibly move to somewhere closer to CoalescingUnit
|
||||
// TODO: separate coalescer config from CanHaveMemtraceCore
|
||||
|
||||
@@ -10,25 +10,10 @@ import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp}
|
||||
import freechips.rocketchip.util.{Code, MultiPortQueue, OnePortLanePositionedQueue}
|
||||
import freechips.rocketchip.unittest._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import radiance.core.{SIMTCoreParams, SIMTCoreKey}
|
||||
|
||||
// TODO: find better place for these
|
||||
|
||||
case class SIMTCoreParams(
|
||||
nWarps: Int = 4, // # of warps in the core
|
||||
nCoreLanes: Int = 4, // # of SIMT threads in the core
|
||||
nMemLanes: Int = 4, // # of memory lanes in the memory interface to the
|
||||
// cache; relates to the LSU lanes
|
||||
nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes
|
||||
)
|
||||
case class MemtraceCoreParams(
|
||||
tracefilename: String = "undefined",
|
||||
traceHasSource: Boolean = false
|
||||
)
|
||||
case class CoalXbarParam()
|
||||
|
||||
case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/ )
|
||||
case object MemtraceCoreKey
|
||||
extends Field[Option[MemtraceCoreParams]](None /*default*/ )
|
||||
case object CoalescerKey
|
||||
extends Field[Option[CoalescerConfig]](None /*default*/ )
|
||||
case object CoalXbarKey extends Field[Option[CoalXbarParam]](None /*default*/ )
|
||||
@@ -2055,7 +2040,7 @@ class MemFuzzer(
|
||||
val laneNodes = Seq.tabulate(numLanes) { i =>
|
||||
val clientParam = Seq(
|
||||
TLMasterParameters.v1(
|
||||
name = "MemTraceDriver" + i.toString,
|
||||
name = "MemFuzzer" + i.toString,
|
||||
sourceId = IdRange(0, numSrcIds)
|
||||
// visibility = Seq(AddressSet(0x0000, 0xffffff))
|
||||
)
|
||||
|
||||
@@ -12,6 +12,7 @@ import freechips.rocketchip.subsystem._
|
||||
import gemmini._
|
||||
import gemmini.Arithmetic.FloatArithmetic._
|
||||
import radiance.tile._
|
||||
import radiance.core._
|
||||
import radiance.memory._
|
||||
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
|
||||
|
||||
@@ -106,6 +107,44 @@ class WithRadianceCores(
|
||||
), tensorCoreFP16, tensorCoreDecoupled, useVxCache)
|
||||
}
|
||||
|
||||
class WithEmulatorCores(
|
||||
n: Int,
|
||||
useVxCache: Boolean
|
||||
) extends Config((site, _, up) => {
|
||||
case TilesLocated(InSubsystem) => {
|
||||
val prev = up(TilesLocated(InSubsystem))
|
||||
val idOffset = up(NumTiles)
|
||||
val emulator = EmulatorTileParams(
|
||||
core = VortexCoreParams(),
|
||||
useVxCache = useVxCache)
|
||||
List.tabulate(n)(i => EmulatorTileAttachParams(
|
||||
emulator.copy(tileId = i + idOffset),
|
||||
RocketCrossingParams()
|
||||
)) ++ prev
|
||||
}
|
||||
case NumTiles => up(NumTiles) + 1
|
||||
case NumRadianceCores => up(NumRadianceCores) + 1
|
||||
})
|
||||
|
||||
class WithFuzzerCores(
|
||||
n: Int,
|
||||
useVxCache: Boolean
|
||||
) extends Config((site, _, up) => {
|
||||
case TilesLocated(InSubsystem) => {
|
||||
val prev = up(TilesLocated(InSubsystem))
|
||||
val idOffset = up(NumTiles)
|
||||
val fuzzer = FuzzerTileParams(
|
||||
core = VortexCoreParams(),
|
||||
useVxCache = useVxCache)
|
||||
List.tabulate(n)(i => FuzzerTileAttachParams(
|
||||
fuzzer.copy(tileId = i + idOffset),
|
||||
RocketCrossingParams()
|
||||
)) ++ prev
|
||||
}
|
||||
case NumTiles => up(NumTiles) + 1
|
||||
case NumRadianceCores => up(NumRadianceCores) + 1
|
||||
})
|
||||
|
||||
object RadianceGemminiDataType extends Enumeration {
|
||||
type Type = Value
|
||||
val FP32, FP16, BF16, Int8 = Value
|
||||
@@ -244,25 +283,6 @@ class WithRadianceFrameBuffer(baseAddress: BigInt,
|
||||
}
|
||||
})
|
||||
|
||||
class WithFuzzerCores(
|
||||
n: Int,
|
||||
useVxCache: Boolean
|
||||
) extends Config((site, _, up) => {
|
||||
case TilesLocated(InSubsystem) => {
|
||||
val prev = up(TilesLocated(InSubsystem))
|
||||
val idOffset = up(NumTiles)
|
||||
val fuzzer = FuzzerTileParams(
|
||||
core = VortexCoreParams(),
|
||||
useVxCache = useVxCache)
|
||||
List.tabulate(n)(i => FuzzerTileAttachParams(
|
||||
fuzzer.copy(tileId = i + idOffset),
|
||||
RocketCrossingParams()
|
||||
)) ++ prev
|
||||
}
|
||||
case NumTiles => up(NumTiles) + 1
|
||||
case NumRadianceCores => up(NumRadianceCores) + 1
|
||||
})
|
||||
|
||||
class WithRadianceCluster(
|
||||
clusterId: Int,
|
||||
location: HierarchicalLocation = InSubsystem,
|
||||
|
||||
96
src/main/scala/radiance/tile/EmulatorTile.scala
Normal file
96
src/main/scala/radiance/tile/EmulatorTile.scala
Normal file
@@ -0,0 +1,96 @@
|
||||
// See LICENSE.SiFive for license details.
|
||||
// See LICENSE.Berkeley for license details.
|
||||
|
||||
package radiance.tile
|
||||
|
||||
import chisel3._
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
import org.chipsalliance.diplomacy.lazymodule.LazyModule
|
||||
import freechips.rocketchip.resources.SimpleDevice
|
||||
import freechips.rocketchip.prci.ClockCrossingType
|
||||
import freechips.rocketchip.rocket._
|
||||
import freechips.rocketchip.tile._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile}
|
||||
import freechips.rocketchip.prci.{ClockSinkParameters}
|
||||
import radiance.core._
|
||||
import radiance.memory.{CoalescingUnit, CoalescerKey}
|
||||
|
||||
// TODO: De-duplicate between this and FuzzerTile
|
||||
|
||||
case class EmulatorTileParams(
|
||||
core: VortexCoreParams = VortexCoreParams(), // TODO: remove this
|
||||
useVxCache: Boolean = false,
|
||||
tileId: Int = 0,
|
||||
) extends InstantiableTileParams[EmulatorTile] {
|
||||
def instantiate(crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByHartIdImpl)(
|
||||
implicit p: Parameters
|
||||
): EmulatorTile = {
|
||||
new EmulatorTile(this, crossing, lookup)
|
||||
}
|
||||
val clockSinkParams = ClockSinkParameters()
|
||||
val blockerCtrlAddr = None
|
||||
val icache = None
|
||||
val dcache = None
|
||||
val btb = None
|
||||
val baseName = "radiance_emulator_tile"
|
||||
val uniqueName = s"${baseName}_$tileId"
|
||||
}
|
||||
|
||||
case class EmulatorTileAttachParams(
|
||||
tileParams: EmulatorTileParams,
|
||||
crossingParams: HierarchicalElementCrossingParamsLike
|
||||
) extends CanAttachTile { type TileType = EmulatorTile }
|
||||
|
||||
class EmulatorTile private (
|
||||
val EmulatorParams: EmulatorTileParams,
|
||||
crossing: ClockCrossingType,
|
||||
lookup: LookupByHartIdImpl,
|
||||
q: Parameters
|
||||
) extends BaseTile(EmulatorParams, crossing, lookup, q)
|
||||
with SinksExternalInterrupts
|
||||
with SourcesExternalNotifications {
|
||||
def this(
|
||||
params: EmulatorTileParams,
|
||||
crossing: HierarchicalElementCrossingParamsLike,
|
||||
lookup: LookupByHartIdImpl
|
||||
)(implicit p: Parameters) =
|
||||
this(params, crossing.crossingType, lookup, p)
|
||||
|
||||
val cpuDevice: SimpleDevice = new SimpleDevice("emulator", Nil)
|
||||
|
||||
val intOutwardNode = None
|
||||
val slaveNode: TLInwardNode = TLIdentityNode()
|
||||
val masterNode = visibilityNode
|
||||
// val statusNode = BundleBridgeSource(() => new GroundTestStatus)
|
||||
|
||||
val (numLanes, numSrcIds) = p(SIMTCoreKey) match {
|
||||
case Some(param) => (param.nMemLanes, param.nSrcIds)
|
||||
case None => {
|
||||
require(false, "emulator requires SIMTCoreKey to be defined")
|
||||
(0, 0)
|
||||
}
|
||||
}
|
||||
// FIXME: parameterize
|
||||
val wordSizeInBytes = 4
|
||||
|
||||
val emulator = LazyModule(new Emulator(numLanes, numSrcIds, wordSizeInBytes))
|
||||
|
||||
// Conditionally instantiate memory coalescer
|
||||
val coalescerNode = p(CoalescerKey) match {
|
||||
case Some(coalParam) => {
|
||||
val coal = LazyModule(new CoalescingUnit(coalParam))
|
||||
coal.cpuNode :=* TLWidthWidget(4) :=* emulator.node
|
||||
coal.aggregateNode
|
||||
}
|
||||
case None => emulator.node
|
||||
}
|
||||
|
||||
masterNode :=* coalescerNode
|
||||
|
||||
override lazy val module = new EmulatorTileModuleImp(this)
|
||||
}
|
||||
|
||||
class EmulatorTileModuleImp(outer: EmulatorTile) extends BaseTileModuleImp(outer) {
|
||||
outer.reportCease(Some(outer.emulator.module.io.finished))
|
||||
}
|
||||
@@ -4,14 +4,16 @@
|
||||
package radiance.tile
|
||||
|
||||
import chisel3._
|
||||
import org.chipsalliance.cde.config.{Parameters}
|
||||
import freechips.rocketchip.diplomacy.{SimpleDevice, LazyModule}
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
import org.chipsalliance.diplomacy.lazymodule.LazyModule
|
||||
import freechips.rocketchip.resources.SimpleDevice
|
||||
import freechips.rocketchip.prci.ClockCrossingType
|
||||
import freechips.rocketchip.rocket._
|
||||
import freechips.rocketchip.tile._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile}
|
||||
import freechips.rocketchip.prci.{ClockSinkParameters}
|
||||
import radiance.core.{SIMTCoreKey}
|
||||
import radiance.memory._
|
||||
|
||||
case class FuzzerTileParams(
|
||||
|
||||
@@ -19,6 +19,7 @@ import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.util._
|
||||
import midas.targetutils.SynthesizePrintf
|
||||
import org.chipsalliance.cde.config._
|
||||
import radiance.core._
|
||||
import radiance.memory._
|
||||
import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceSimArgs}
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ class TensorCoreDecoupledTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||
behavior of "TensorCoreDecoupled"
|
||||
|
||||
it should "do the right thing" in {
|
||||
test(new TensorCoreDecoupled(8, 8, numSourceIds = 4, tilingParams = TensorTilingParams()))
|
||||
test(new TensorCoreDecoupled(8, 8, numSourceIds = 4, half = true))
|
||||
{ c =>
|
||||
c.io.initiate.valid.poke(true.B)
|
||||
c.io.initiate.bits.wid.poke(0.U)
|
||||
|
||||
Reference in New Issue
Block a user