Add EmulatorTile
also split core-specific config keys from radiance.memory to radiance.core.
This commit is contained in:
31
src/main/resources/csrc/SimEmulator.cc
Normal file
31
src/main/resources/csrc/SimEmulator.cc
Normal file
@@ -0,0 +1,31 @@
|
||||
#ifndef NO_VPI
|
||||
#include <vpi_user.h>
|
||||
#include <svdpi.h>
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
|
||||
extern "C" void emulator_init_rs(int num_lanes);
|
||||
|
||||
extern "C" void emulator_generate_rs(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||
long long *vec_a_address,
|
||||
uint8_t *vec_a_is_store, int *vec_a_size,
|
||||
long long *vec_a_data, uint8_t *vec_d_ready,
|
||||
uint8_t *vec_d_valid,
|
||||
uint8_t *vec_d_is_store, int *vec_d_size,
|
||||
uint8_t inflight, uint8_t *finished);
|
||||
|
||||
extern "C" void emulator_init(int num_lanes) {
|
||||
emulator_init_rs(num_lanes);
|
||||
}
|
||||
|
||||
extern "C" void emulator_generate(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||
long long *vec_a_address,
|
||||
uint8_t *vec_a_is_store, int *vec_a_size,
|
||||
long long *vec_a_data, uint8_t *vec_d_ready,
|
||||
uint8_t *vec_d_valid, uint8_t *vec_d_is_store,
|
||||
int *vec_d_size, uint8_t inflight,
|
||||
uint8_t *finished) {
|
||||
emulator_generate_rs(vec_a_ready, vec_a_valid, vec_a_address, vec_a_is_store,
|
||||
vec_a_size, vec_a_data, vec_d_ready, vec_d_valid,
|
||||
vec_d_is_store, vec_d_size, inflight, finished);
|
||||
}
|
||||
132
src/main/resources/vsrc/SimEmulator.v
Normal file
132
src/main/resources/vsrc/SimEmulator.v
Normal file
@@ -0,0 +1,132 @@
|
||||
`include "SimDefaults.vh"
|
||||
|
||||
import "DPI-C" function void emulator_init(
|
||||
input longint num_lanes
|
||||
);
|
||||
|
||||
// Make sure to sync the parameters for:
|
||||
// (1) import "DPI-C" declaration
|
||||
// (2) C function declaration
|
||||
// (3) DPI function calls inside initial/always blocks
|
||||
import "DPI-C" function void emulator_generate
|
||||
(
|
||||
input bit vec_a_ready[`MAX_NUM_LANES],
|
||||
output bit vec_a_valid[`MAX_NUM_LANES],
|
||||
output longint vec_a_address[`MAX_NUM_LANES],
|
||||
output bit vec_a_is_store[`MAX_NUM_LANES],
|
||||
output int vec_a_size[`MAX_NUM_LANES],
|
||||
output longint vec_a_data[`MAX_NUM_LANES],
|
||||
|
||||
output bit vec_d_ready[`MAX_NUM_LANES],
|
||||
input bit vec_d_valid[`MAX_NUM_LANES],
|
||||
input bit vec_d_is_store[`MAX_NUM_LANES],
|
||||
input int vec_d_size[`MAX_NUM_LANES],
|
||||
|
||||
input bit inflight,
|
||||
output bit finished
|
||||
);
|
||||
|
||||
module SimEmulator #(parameter NUM_LANES = 4) (
|
||||
input clock,
|
||||
input reset,
|
||||
|
||||
input [NUM_LANES-1:0] a_ready,
|
||||
output [NUM_LANES-1:0] a_valid,
|
||||
output [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] a_address,
|
||||
output [NUM_LANES-1:0] a_is_store,
|
||||
output [`SIMMEM_LOGSIZE_WIDTH*NUM_LANES-1:0] a_size,
|
||||
output [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] a_data,
|
||||
|
||||
output [NUM_LANES-1:0] d_ready,
|
||||
input [NUM_LANES-1:0] d_valid,
|
||||
input [NUM_LANES-1:0] d_is_store,
|
||||
input [`SIMMEM_LOGSIZE_WIDTH*NUM_LANES-1:0] d_size,
|
||||
// TODO: d_mask
|
||||
// TODO: d_data
|
||||
|
||||
input inflight,
|
||||
output finished
|
||||
);
|
||||
// "in": C->verilog, "out": verilog->C
|
||||
// need to be in ascending order to match with C indexing
|
||||
// C array sizes are static, so need to use MAX_NUM_LANES
|
||||
bit __out_a_ready [0:`MAX_NUM_LANES-1];
|
||||
bit __in_a_valid [0:`MAX_NUM_LANES-1];
|
||||
longint __in_a_address [0:`MAX_NUM_LANES-1];
|
||||
bit __in_a_is_store [0:`MAX_NUM_LANES-1];
|
||||
int __in_a_size [0:`MAX_NUM_LANES-1];
|
||||
longint __in_a_data [0:`MAX_NUM_LANES-1];
|
||||
bit __in_d_ready [0:`MAX_NUM_LANES-1];
|
||||
bit __out_d_valid [0:`MAX_NUM_LANES-1];
|
||||
bit __out_d_is_store [0:`MAX_NUM_LANES-1];
|
||||
int __out_d_size [0:`MAX_NUM_LANES-1];
|
||||
bit __out_inflight;
|
||||
bit __in_finished;
|
||||
|
||||
genvar g;
|
||||
generate
|
||||
for (g = 0; g < NUM_LANES; g = g + 1) begin
|
||||
assign __out_a_ready[g] = a_ready[g];
|
||||
assign a_valid[g] = __in_a_valid[g];
|
||||
assign a_address[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH]
|
||||
= __in_a_address[g][`SIMMEM_DATA_WIDTH-1:0];
|
||||
assign a_is_store[g] = __in_a_is_store[g];
|
||||
assign a_size[`SIMMEM_LOGSIZE_WIDTH*g +: `SIMMEM_LOGSIZE_WIDTH]
|
||||
= __in_a_size[g][`SIMMEM_LOGSIZE_WIDTH-1:0];
|
||||
assign a_data[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH]
|
||||
= __in_a_data[g][`SIMMEM_DATA_WIDTH-1:0];
|
||||
assign d_ready[g] = __in_d_ready[g];
|
||||
assign __out_d_valid[g] = d_valid[g];
|
||||
assign __out_d_is_store[g] = d_is_store[g];
|
||||
assign __out_d_size[g] = d_size[`SIMMEM_LOGSIZE_WIDTH*g +: `SIMMEM_LOGSIZE_WIDTH];
|
||||
end
|
||||
assign __out_inflight = inflight;
|
||||
endgenerate
|
||||
assign finished = __in_finished;
|
||||
|
||||
initial begin
|
||||
emulator_init(NUM_LANES);
|
||||
end
|
||||
|
||||
// negedge is important here; the DPI logic is essentially functioning as
|
||||
// a combinational logic, so we want to reflect the signal change from DPI
|
||||
// at the *current* cycle, not the next.
|
||||
always @(negedge clock) begin
|
||||
if (reset) begin
|
||||
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
|
||||
__in_a_valid[tid] = 1'b0;
|
||||
__in_a_address[tid] = `SIMMEM_DATA_WIDTH'b0;
|
||||
__in_a_is_store[tid] = 1'b0;
|
||||
__in_a_size[tid] = 32'b0;
|
||||
__in_a_data[tid] = `SIMMEM_DATA_WIDTH'b0;
|
||||
__in_d_ready[tid] = 1'b0;
|
||||
end
|
||||
__in_finished = 1'b0;
|
||||
end else begin
|
||||
emulator_generate(
|
||||
__out_a_ready,
|
||||
__in_a_valid,
|
||||
__in_a_address,
|
||||
__in_a_is_store,
|
||||
__in_a_size,
|
||||
__in_a_data,
|
||||
|
||||
__in_d_ready,
|
||||
__out_d_valid,
|
||||
__out_d_is_store,
|
||||
__out_d_size,
|
||||
|
||||
__out_inflight,
|
||||
__in_finished
|
||||
);
|
||||
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
|
||||
$display("verilog: %04d a_valid[%d]=%d, a_address[%d]=0x%x, d_ready[%d]=%d",
|
||||
$time, tid, __in_a_valid[tid], tid, __in_a_address[tid], tid, __in_d_ready[tid]);
|
||||
end
|
||||
|
||||
if (finished) begin
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
243
src/main/scala/radiance/core/Emulator.scala
Normal file
243
src/main/scala/radiance/core/Emulator.scala
Normal file
@@ -0,0 +1,243 @@
|
||||
package radiance.core
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import org.chipsalliance.cde.config.{Field, Parameters}
|
||||
import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp}
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.diplomacy.{IdRange, AddressSet, BufferParams}
|
||||
import radiance.memory.{SourceGenerator, TraceLine, TLPrintf}
|
||||
|
||||
case class SIMTCoreParams(
|
||||
nWarps: Int = 4, // # of warps in the core
|
||||
nCoreLanes: Int = 4, // # of SIMT threads in the core
|
||||
nMemLanes: Int = 4, // # of memory lanes in the memory interface to the
|
||||
// cache; relates to the LSU lanes
|
||||
nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes
|
||||
)
|
||||
case class MemtraceCoreParams(
|
||||
tracefilename: String = "undefined",
|
||||
traceHasSource: Boolean = false
|
||||
)
|
||||
|
||||
case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/ )
|
||||
case object MemtraceCoreKey
|
||||
extends Field[Option[MemtraceCoreParams]](None /*default*/ )
|
||||
|
||||
// #############################################################################
|
||||
// FIXME: copy-paste from MemFuzzer
|
||||
// #############################################################################
|
||||
|
||||
class Emulator(
|
||||
numLanes: Int,
|
||||
numSrcIds: Int,
|
||||
wordSizeInBytes: Int,
|
||||
)(implicit p: Parameters)
|
||||
extends LazyModule {
|
||||
val laneNodes = Seq.tabulate(numLanes) { i =>
|
||||
val clientParam = Seq(
|
||||
TLMasterParameters.v1(
|
||||
name = "Emulator" + i.toString,
|
||||
sourceId = IdRange(0, numSrcIds)
|
||||
// visibility = Seq(AddressSet(0x0000, 0xffffff))
|
||||
)
|
||||
)
|
||||
TLClientNode(Seq(TLMasterPortParameters.v1(clientParam)))
|
||||
}
|
||||
|
||||
val node = TLIdentityNode()
|
||||
laneNodes.foreach(node := _)
|
||||
|
||||
lazy val module = new EmulatorImp(this, numLanes, numSrcIds, wordSizeInBytes)
|
||||
}
|
||||
|
||||
class EmulatorImp(
|
||||
outer: Emulator,
|
||||
numLanes : Int,
|
||||
numSrcIds: Int,
|
||||
wordSizeInBytes: Int,
|
||||
) extends LazyModuleImp(outer) {
|
||||
val io = IO(new Bundle {
|
||||
val finished = Output(Bool())
|
||||
})
|
||||
val sim = Module(new SimEmulator(numLanes))
|
||||
sim.io.clock := clock
|
||||
sim.io.reset := reset.asBool
|
||||
|
||||
sim.io.a.ready := VecInit(outer.laneNodes.map { node =>
|
||||
val (tlOut, _) = node.out(0)
|
||||
tlOut.a.ready
|
||||
}).asUInt
|
||||
|
||||
io.finished := sim.io.finished
|
||||
|
||||
// connect Verilog <-> Chisel IO
|
||||
// Verilog IO flattened across all lanes
|
||||
val laneReqs = Wire(Vec(numLanes, Decoupled(new TraceLine)))
|
||||
val addrW = laneReqs(0).bits.address.getWidth
|
||||
val sizeW = laneReqs(0).bits.size.getWidth
|
||||
val dataW = laneReqs(0).bits.data.getWidth
|
||||
laneReqs.zipWithIndex.foreach { case (req, i) =>
|
||||
req.valid := sim.io.a.valid(i)
|
||||
req.bits.source := 0.U // DPI doesn't generate contain source id
|
||||
req.bits.address := sim.io.a.address(addrW * (i + 1) - 1, addrW * i)
|
||||
req.bits.is_store := sim.io.a.is_store(i)
|
||||
req.bits.size := sim.io.a.size(sizeW * (i + 1) - 1, sizeW * i)
|
||||
req.bits.data := sim.io.a.data(dataW * (i + 1) - 1, dataW * i)
|
||||
}
|
||||
sim.io.a.ready := VecInit(laneReqs.map(_.ready)).asUInt
|
||||
|
||||
val laneResps = Wire(Vec(numLanes, Flipped(Decoupled(new TraceLine))))
|
||||
laneResps.zipWithIndex.foreach { case (resp, i) =>
|
||||
resp.ready := sim.io.d.ready(i)
|
||||
// TODO: not handled in DPI
|
||||
resp.bits.source := DontCare
|
||||
resp.bits.address := DontCare
|
||||
resp.bits.data := DontCare
|
||||
}
|
||||
sim.io.d.valid := VecInit(laneResps.map(_.valid)).asUInt
|
||||
sim.io.d.is_store := VecInit(laneResps.map(_.bits.is_store)).asUInt
|
||||
sim.io.d.size := VecInit(laneResps.map(_.bits.size)).asUInt
|
||||
|
||||
val sourceGens = Seq.fill(numLanes)(
|
||||
Module(
|
||||
new SourceGenerator(
|
||||
log2Ceil(numSrcIds),
|
||||
ignoreInUse = false
|
||||
)
|
||||
)
|
||||
)
|
||||
val anyInflight = sourceGens.map(_.io.inflight).reduce(_ || _)
|
||||
sim.io.inflight := anyInflight
|
||||
|
||||
// Take requests off of the queue and generate TL requests
|
||||
(outer.laneNodes zip (laneReqs zip laneResps)).zipWithIndex.foreach {
|
||||
case ((node, (req, resp)), lane) =>
|
||||
val (tlOut, edge) = node.out(0)
|
||||
|
||||
// Requests --------------------------------------------------------------
|
||||
//
|
||||
// Core only makes accesses of granularity larger than a word, so we want
|
||||
// the trace driver to act so as well.
|
||||
// That means if req.size is smaller than word size, we need to pad data
|
||||
// with zeros to generate a word-size request, and set mask accordingly.
|
||||
val offsetInWord = req.bits.address % wordSizeInBytes.U
|
||||
val subword = req.bits.size < log2Ceil(wordSizeInBytes).U
|
||||
|
||||
// `mask` is currently unused
|
||||
// val mask = Wire(UInt(wordSizeInBytes.W))
|
||||
val wordData = Wire(UInt((wordSizeInBytes * 8 * 2).W))
|
||||
val sizeInBytes = Wire(UInt((sizeW + 1).W))
|
||||
sizeInBytes := (1.U) << req.bits.size
|
||||
// mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U)
|
||||
wordData := Mux(subword, req.bits.data << (offsetInWord * 8.U), req.bits.data)
|
||||
val wordAlignedAddress =
|
||||
req.bits.address & ~((1 << log2Ceil(wordSizeInBytes)) - 1).U(addrW.W)
|
||||
val wordAlignedSize = Mux(subword, 2.U, req.bits.size)
|
||||
|
||||
val sourceGen = sourceGens(lane)
|
||||
sourceGen.io.gen := tlOut.a.fire
|
||||
sourceGen.io.reclaim.valid := tlOut.d.fire
|
||||
sourceGen.io.reclaim.bits := tlOut.d.bits.source
|
||||
sourceGen.io.meta := DontCare
|
||||
|
||||
val (plegal, pbits) = edge.Put(
|
||||
fromSource = sourceGen.io.id.bits,
|
||||
toAddress = wordAlignedAddress,
|
||||
lgSize = wordAlignedSize, // trace line already holds log2(size)
|
||||
// data should be aligned to beatBytes
|
||||
data =
|
||||
(wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt
|
||||
)
|
||||
val (glegal, gbits) = edge.Get(
|
||||
fromSource = sourceGen.io.id.bits,
|
||||
toAddress = wordAlignedAddress,
|
||||
lgSize = wordAlignedSize
|
||||
)
|
||||
val legal = Mux(req.bits.is_store, plegal, glegal)
|
||||
val bits = Mux(req.bits.is_store, pbits, gbits)
|
||||
|
||||
tlOut.a.valid := req.valid && sourceGen.io.id.valid
|
||||
req.ready := tlOut.a.ready && sourceGen.io.id.valid
|
||||
|
||||
when(tlOut.a.fire) {
|
||||
assert(legal, "illegal TL req gen")
|
||||
}
|
||||
tlOut.a.bits := bits
|
||||
|
||||
// Responses -------------------------------------------------------------
|
||||
//
|
||||
tlOut.d.ready := resp.ready
|
||||
resp.valid := tlOut.d.valid
|
||||
resp.bits.is_store := !edge.hasData(tlOut.d.bits)
|
||||
resp.bits.size := tlOut.d.bits.size
|
||||
|
||||
tlOut.b.ready := true.B
|
||||
tlOut.c.valid := false.B
|
||||
tlOut.e.valid := false.B
|
||||
|
||||
// debug
|
||||
dontTouch(req)
|
||||
when(tlOut.a.valid) {
|
||||
printf(s"Lane ${lane}: ");
|
||||
TLPrintf(
|
||||
"Emulator",
|
||||
tlOut.a.bits.source,
|
||||
tlOut.a.bits.address,
|
||||
tlOut.a.bits.size,
|
||||
tlOut.a.bits.mask,
|
||||
req.bits.is_store,
|
||||
tlOut.a.bits.data,
|
||||
req.bits.data
|
||||
)
|
||||
}
|
||||
dontTouch(tlOut.a)
|
||||
dontTouch(tlOut.d)
|
||||
}
|
||||
|
||||
// when(traceFinished && allReqReclaimed && noValidReqs) {
|
||||
// assert(
|
||||
// false.B,
|
||||
// "\n\n\nsimulation Successfully finished\n\n\n (this assertion intentional fail upon MemTracer termination)"
|
||||
// )
|
||||
// }
|
||||
}
|
||||
|
||||
class SimEmulator(numLanes: Int)
|
||||
extends BlackBox(Map("NUM_LANES" -> numLanes))
|
||||
with HasBlackBoxResource {
|
||||
val traceLineT = new TraceLine
|
||||
val addrW = traceLineT.address.getWidth
|
||||
val sizeW = traceLineT.size.getWidth
|
||||
val dataW = traceLineT.data.getWidth
|
||||
val io = IO(new Bundle {
|
||||
val clock = Input(Clock())
|
||||
val reset = Input(Bool())
|
||||
val inflight = Input(Bool())
|
||||
val finished = Output(Bool())
|
||||
|
||||
val a =
|
||||
new Bundle {
|
||||
val ready = Input(UInt(numLanes.W))
|
||||
val valid = Output(UInt(numLanes.W))
|
||||
// Chisel can't interface with Verilog 2D port, so flatten all lanes into
|
||||
// single wide 1D array.
|
||||
val address = Output(UInt((addrW * numLanes).W))
|
||||
val is_store = Output(UInt(numLanes.W))
|
||||
val size = Output(UInt((sizeW * numLanes).W))
|
||||
val data = Output(UInt((dataW * numLanes).W))
|
||||
}
|
||||
val d =
|
||||
new Bundle {
|
||||
val ready = Output(UInt(numLanes.W))
|
||||
val valid = Input(UInt(numLanes.W))
|
||||
val is_store = Input(UInt(numLanes.W))
|
||||
val size = Input(UInt((sizeW * numLanes).W))
|
||||
}
|
||||
})
|
||||
|
||||
addResource("/vsrc/SimDefaults.vh")
|
||||
addResource("/vsrc/SimEmulator.v")
|
||||
addResource("/csrc/SimEmulator.cc")
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import freechips.rocketchip.diplomacy.LazyModule
|
||||
import freechips.rocketchip.subsystem._
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
import freechips.rocketchip.tilelink._
|
||||
import radiance.core.{SIMTCoreKey, MemtraceCoreKey}
|
||||
|
||||
// TODO: possibly move to somewhere closer to CoalescingUnit
|
||||
// TODO: separate coalescer config from CanHaveMemtraceCore
|
||||
|
||||
@@ -10,25 +10,10 @@ import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp}
|
||||
import freechips.rocketchip.util.{Code, MultiPortQueue, OnePortLanePositionedQueue}
|
||||
import freechips.rocketchip.unittest._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import radiance.core.{SIMTCoreParams, SIMTCoreKey}
|
||||
|
||||
// TODO: find better place for these
|
||||
|
||||
case class SIMTCoreParams(
|
||||
nWarps: Int = 4, // # of warps in the core
|
||||
nCoreLanes: Int = 4, // # of SIMT threads in the core
|
||||
nMemLanes: Int = 4, // # of memory lanes in the memory interface to the
|
||||
// cache; relates to the LSU lanes
|
||||
nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes
|
||||
)
|
||||
case class MemtraceCoreParams(
|
||||
tracefilename: String = "undefined",
|
||||
traceHasSource: Boolean = false
|
||||
)
|
||||
case class CoalXbarParam()
|
||||
|
||||
case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/ )
|
||||
case object MemtraceCoreKey
|
||||
extends Field[Option[MemtraceCoreParams]](None /*default*/ )
|
||||
case object CoalescerKey
|
||||
extends Field[Option[CoalescerConfig]](None /*default*/ )
|
||||
case object CoalXbarKey extends Field[Option[CoalXbarParam]](None /*default*/ )
|
||||
|
||||
@@ -12,6 +12,7 @@ import freechips.rocketchip.subsystem._
|
||||
import gemmini._
|
||||
import gemmini.Arithmetic.FloatArithmetic._
|
||||
import radiance.tile._
|
||||
import radiance.core._
|
||||
import radiance.memory._
|
||||
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
|
||||
|
||||
@@ -106,6 +107,44 @@ class WithRadianceCores(
|
||||
), tensorCoreFP16, tensorCoreDecoupled, useVxCache)
|
||||
}
|
||||
|
||||
class WithEmulatorCores(
|
||||
n: Int,
|
||||
useVxCache: Boolean
|
||||
) extends Config((site, _, up) => {
|
||||
case TilesLocated(InSubsystem) => {
|
||||
val prev = up(TilesLocated(InSubsystem))
|
||||
val idOffset = up(NumTiles)
|
||||
val emulator = EmulatorTileParams(
|
||||
core = VortexCoreParams(),
|
||||
useVxCache = useVxCache)
|
||||
List.tabulate(n)(i => EmulatorTileAttachParams(
|
||||
emulator.copy(tileId = i + idOffset),
|
||||
RocketCrossingParams()
|
||||
)) ++ prev
|
||||
}
|
||||
case NumTiles => up(NumTiles) + 1
|
||||
case NumRadianceCores => up(NumRadianceCores) + 1
|
||||
})
|
||||
|
||||
class WithFuzzerCores(
|
||||
n: Int,
|
||||
useVxCache: Boolean
|
||||
) extends Config((site, _, up) => {
|
||||
case TilesLocated(InSubsystem) => {
|
||||
val prev = up(TilesLocated(InSubsystem))
|
||||
val idOffset = up(NumTiles)
|
||||
val fuzzer = FuzzerTileParams(
|
||||
core = VortexCoreParams(),
|
||||
useVxCache = useVxCache)
|
||||
List.tabulate(n)(i => FuzzerTileAttachParams(
|
||||
fuzzer.copy(tileId = i + idOffset),
|
||||
RocketCrossingParams()
|
||||
)) ++ prev
|
||||
}
|
||||
case NumTiles => up(NumTiles) + 1
|
||||
case NumRadianceCores => up(NumRadianceCores) + 1
|
||||
})
|
||||
|
||||
object RadianceGemminiDataType extends Enumeration {
|
||||
type Type = Value
|
||||
val FP32, FP16, BF16, Int8 = Value
|
||||
@@ -244,25 +283,6 @@ class WithRadianceFrameBuffer(baseAddress: BigInt,
|
||||
}
|
||||
})
|
||||
|
||||
class WithFuzzerCores(
|
||||
n: Int,
|
||||
useVxCache: Boolean
|
||||
) extends Config((site, _, up) => {
|
||||
case TilesLocated(InSubsystem) => {
|
||||
val prev = up(TilesLocated(InSubsystem))
|
||||
val idOffset = up(NumTiles)
|
||||
val fuzzer = FuzzerTileParams(
|
||||
core = VortexCoreParams(),
|
||||
useVxCache = useVxCache)
|
||||
List.tabulate(n)(i => FuzzerTileAttachParams(
|
||||
fuzzer.copy(tileId = i + idOffset),
|
||||
RocketCrossingParams()
|
||||
)) ++ prev
|
||||
}
|
||||
case NumTiles => up(NumTiles) + 1
|
||||
case NumRadianceCores => up(NumRadianceCores) + 1
|
||||
})
|
||||
|
||||
class WithRadianceCluster(
|
||||
clusterId: Int,
|
||||
location: HierarchicalLocation = InSubsystem,
|
||||
|
||||
96
src/main/scala/radiance/tile/EmulatorTile.scala
Normal file
96
src/main/scala/radiance/tile/EmulatorTile.scala
Normal file
@@ -0,0 +1,96 @@
|
||||
// See LICENSE.SiFive for license details.
|
||||
// See LICENSE.Berkeley for license details.
|
||||
|
||||
package radiance.tile
|
||||
|
||||
import chisel3._
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
import org.chipsalliance.diplomacy.lazymodule.LazyModule
|
||||
import freechips.rocketchip.resources.SimpleDevice
|
||||
import freechips.rocketchip.prci.ClockCrossingType
|
||||
import freechips.rocketchip.rocket._
|
||||
import freechips.rocketchip.tile._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile}
|
||||
import freechips.rocketchip.prci.{ClockSinkParameters}
|
||||
import radiance.core._
|
||||
import radiance.memory.{CoalescingUnit, CoalescerKey}
|
||||
|
||||
// TODO: De-duplicate between this and FuzzerTile
|
||||
|
||||
case class EmulatorTileParams(
|
||||
core: VortexCoreParams = VortexCoreParams(), // TODO: remove this
|
||||
useVxCache: Boolean = false,
|
||||
tileId: Int = 0,
|
||||
) extends InstantiableTileParams[EmulatorTile] {
|
||||
def instantiate(crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByHartIdImpl)(
|
||||
implicit p: Parameters
|
||||
): EmulatorTile = {
|
||||
new EmulatorTile(this, crossing, lookup)
|
||||
}
|
||||
val clockSinkParams = ClockSinkParameters()
|
||||
val blockerCtrlAddr = None
|
||||
val icache = None
|
||||
val dcache = None
|
||||
val btb = None
|
||||
val baseName = "radiance_emulator_tile"
|
||||
val uniqueName = s"${baseName}_$tileId"
|
||||
}
|
||||
|
||||
case class EmulatorTileAttachParams(
|
||||
tileParams: EmulatorTileParams,
|
||||
crossingParams: HierarchicalElementCrossingParamsLike
|
||||
) extends CanAttachTile { type TileType = EmulatorTile }
|
||||
|
||||
class EmulatorTile private (
|
||||
val EmulatorParams: EmulatorTileParams,
|
||||
crossing: ClockCrossingType,
|
||||
lookup: LookupByHartIdImpl,
|
||||
q: Parameters
|
||||
) extends BaseTile(EmulatorParams, crossing, lookup, q)
|
||||
with SinksExternalInterrupts
|
||||
with SourcesExternalNotifications {
|
||||
def this(
|
||||
params: EmulatorTileParams,
|
||||
crossing: HierarchicalElementCrossingParamsLike,
|
||||
lookup: LookupByHartIdImpl
|
||||
)(implicit p: Parameters) =
|
||||
this(params, crossing.crossingType, lookup, p)
|
||||
|
||||
val cpuDevice: SimpleDevice = new SimpleDevice("emulator", Nil)
|
||||
|
||||
val intOutwardNode = None
|
||||
val slaveNode: TLInwardNode = TLIdentityNode()
|
||||
val masterNode = visibilityNode
|
||||
// val statusNode = BundleBridgeSource(() => new GroundTestStatus)
|
||||
|
||||
val (numLanes, numSrcIds) = p(SIMTCoreKey) match {
|
||||
case Some(param) => (param.nMemLanes, param.nSrcIds)
|
||||
case None => {
|
||||
require(false, "emulator requires SIMTCoreKey to be defined")
|
||||
(0, 0)
|
||||
}
|
||||
}
|
||||
// FIXME: parameterize
|
||||
val wordSizeInBytes = 4
|
||||
|
||||
val emulator = LazyModule(new Emulator(numLanes, numSrcIds, wordSizeInBytes))
|
||||
|
||||
// Conditionally instantiate memory coalescer
|
||||
val coalescerNode = p(CoalescerKey) match {
|
||||
case Some(coalParam) => {
|
||||
val coal = LazyModule(new CoalescingUnit(coalParam))
|
||||
coal.cpuNode :=* TLWidthWidget(4) :=* emulator.node
|
||||
coal.aggregateNode
|
||||
}
|
||||
case None => emulator.node
|
||||
}
|
||||
|
||||
masterNode :=* coalescerNode
|
||||
|
||||
override lazy val module = new EmulatorTileModuleImp(this)
|
||||
}
|
||||
|
||||
class EmulatorTileModuleImp(outer: EmulatorTile) extends BaseTileModuleImp(outer) {
|
||||
outer.reportCease(Some(outer.emulator.module.io.finished))
|
||||
}
|
||||
@@ -13,6 +13,7 @@ import freechips.rocketchip.tile._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile}
|
||||
import freechips.rocketchip.prci.{ClockSinkParameters}
|
||||
import radiance.core.{SIMTCoreKey}
|
||||
import radiance.memory._
|
||||
|
||||
case class FuzzerTileParams(
|
||||
|
||||
@@ -19,6 +19,7 @@ import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.util._
|
||||
import midas.targetutils.SynthesizePrintf
|
||||
import org.chipsalliance.cde.config._
|
||||
import radiance.core._
|
||||
import radiance.memory._
|
||||
import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceSimArgs}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user