diff --git a/src/main/resources/csrc/SimEmulator.cc b/src/main/resources/csrc/SimEmulator.cc new file mode 100644 index 0000000..af454d5 --- /dev/null +++ b/src/main/resources/csrc/SimEmulator.cc @@ -0,0 +1,31 @@ +#ifndef NO_VPI +#include +#include +#endif +#include + +extern "C" void emulator_init_rs(int num_lanes); + +extern "C" void emulator_generate_rs(uint8_t *vec_a_ready, uint8_t *vec_a_valid, + long long *vec_a_address, + uint8_t *vec_a_is_store, int *vec_a_size, + long long *vec_a_data, uint8_t *vec_d_ready, + uint8_t *vec_d_valid, + uint8_t *vec_d_is_store, int *vec_d_size, + uint8_t inflight, uint8_t *finished); + +extern "C" void emulator_init(int num_lanes) { + emulator_init_rs(num_lanes); +} + +extern "C" void emulator_generate(uint8_t *vec_a_ready, uint8_t *vec_a_valid, + long long *vec_a_address, + uint8_t *vec_a_is_store, int *vec_a_size, + long long *vec_a_data, uint8_t *vec_d_ready, + uint8_t *vec_d_valid, uint8_t *vec_d_is_store, + int *vec_d_size, uint8_t inflight, + uint8_t *finished) { + emulator_generate_rs(vec_a_ready, vec_a_valid, vec_a_address, vec_a_is_store, + vec_a_size, vec_a_data, vec_d_ready, vec_d_valid, + vec_d_is_store, vec_d_size, inflight, finished); +} diff --git a/src/main/resources/vsrc/SimEmulator.v b/src/main/resources/vsrc/SimEmulator.v new file mode 100644 index 0000000..9b60316 --- /dev/null +++ b/src/main/resources/vsrc/SimEmulator.v @@ -0,0 +1,132 @@ +`include "SimDefaults.vh" + +import "DPI-C" function void emulator_init( + input longint num_lanes +); + +// Make sure to sync the parameters for: +// (1) import "DPI-C" declaration +// (2) C function declaration +// (3) DPI function calls inside initial/always blocks +import "DPI-C" function void emulator_generate +( + input bit vec_a_ready[`MAX_NUM_LANES], + output bit vec_a_valid[`MAX_NUM_LANES], + output longint vec_a_address[`MAX_NUM_LANES], + output bit vec_a_is_store[`MAX_NUM_LANES], + output int vec_a_size[`MAX_NUM_LANES], + output longint vec_a_data[`MAX_NUM_LANES], + + output bit vec_d_ready[`MAX_NUM_LANES], + input bit vec_d_valid[`MAX_NUM_LANES], + input bit vec_d_is_store[`MAX_NUM_LANES], + input int vec_d_size[`MAX_NUM_LANES], + + input bit inflight, + output bit finished +); + +module SimEmulator #(parameter NUM_LANES = 4) ( + input clock, + input reset, + + input [NUM_LANES-1:0] a_ready, + output [NUM_LANES-1:0] a_valid, + output [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] a_address, + output [NUM_LANES-1:0] a_is_store, + output [`SIMMEM_LOGSIZE_WIDTH*NUM_LANES-1:0] a_size, + output [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] a_data, + + output [NUM_LANES-1:0] d_ready, + input [NUM_LANES-1:0] d_valid, + input [NUM_LANES-1:0] d_is_store, + input [`SIMMEM_LOGSIZE_WIDTH*NUM_LANES-1:0] d_size, + // TODO: d_mask + // TODO: d_data + + input inflight, + output finished +); + // "in": C->verilog, "out": verilog->C + // need to be in ascending order to match with C indexing + // C array sizes are static, so need to use MAX_NUM_LANES + bit __out_a_ready [0:`MAX_NUM_LANES-1]; + bit __in_a_valid [0:`MAX_NUM_LANES-1]; + longint __in_a_address [0:`MAX_NUM_LANES-1]; + bit __in_a_is_store [0:`MAX_NUM_LANES-1]; + int __in_a_size [0:`MAX_NUM_LANES-1]; + longint __in_a_data [0:`MAX_NUM_LANES-1]; + bit __in_d_ready [0:`MAX_NUM_LANES-1]; + bit __out_d_valid [0:`MAX_NUM_LANES-1]; + bit __out_d_is_store [0:`MAX_NUM_LANES-1]; + int __out_d_size [0:`MAX_NUM_LANES-1]; + bit __out_inflight; + bit __in_finished; + + genvar g; + generate + for (g = 0; g < NUM_LANES; g = g + 1) begin + assign __out_a_ready[g] = a_ready[g]; + assign a_valid[g] = __in_a_valid[g]; + assign a_address[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH] + = __in_a_address[g][`SIMMEM_DATA_WIDTH-1:0]; + assign a_is_store[g] = __in_a_is_store[g]; + assign a_size[`SIMMEM_LOGSIZE_WIDTH*g +: `SIMMEM_LOGSIZE_WIDTH] + = __in_a_size[g][`SIMMEM_LOGSIZE_WIDTH-1:0]; + assign a_data[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH] + = __in_a_data[g][`SIMMEM_DATA_WIDTH-1:0]; + assign d_ready[g] = __in_d_ready[g]; + assign __out_d_valid[g] = d_valid[g]; + assign __out_d_is_store[g] = d_is_store[g]; + assign __out_d_size[g] = d_size[`SIMMEM_LOGSIZE_WIDTH*g +: `SIMMEM_LOGSIZE_WIDTH]; + end + assign __out_inflight = inflight; + endgenerate + assign finished = __in_finished; + + initial begin + emulator_init(NUM_LANES); + end + + // negedge is important here; the DPI logic is essentially functioning as + // a combinational logic, so we want to reflect the signal change from DPI + // at the *current* cycle, not the next. + always @(negedge clock) begin + if (reset) begin + for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin + __in_a_valid[tid] = 1'b0; + __in_a_address[tid] = `SIMMEM_DATA_WIDTH'b0; + __in_a_is_store[tid] = 1'b0; + __in_a_size[tid] = 32'b0; + __in_a_data[tid] = `SIMMEM_DATA_WIDTH'b0; + __in_d_ready[tid] = 1'b0; + end + __in_finished = 1'b0; + end else begin + emulator_generate( + __out_a_ready, + __in_a_valid, + __in_a_address, + __in_a_is_store, + __in_a_size, + __in_a_data, + + __in_d_ready, + __out_d_valid, + __out_d_is_store, + __out_d_size, + + __out_inflight, + __in_finished + ); + for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin + $display("verilog: %04d a_valid[%d]=%d, a_address[%d]=0x%x, d_ready[%d]=%d", + $time, tid, __in_a_valid[tid], tid, __in_a_address[tid], tid, __in_d_ready[tid]); + end + + if (finished) begin + $finish; + end + end + end +endmodule diff --git a/src/main/scala/radiance/core/Emulator.scala b/src/main/scala/radiance/core/Emulator.scala new file mode 100644 index 0000000..1afe2d7 --- /dev/null +++ b/src/main/scala/radiance/core/Emulator.scala @@ -0,0 +1,243 @@ +package radiance.core + +import chisel3._ +import chisel3.util._ +import org.chipsalliance.cde.config.{Field, Parameters} +import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp} +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.diplomacy.{IdRange, AddressSet, BufferParams} +import radiance.memory.{SourceGenerator, TraceLine, TLPrintf} + +case class SIMTCoreParams( + nWarps: Int = 4, // # of warps in the core + nCoreLanes: Int = 4, // # of SIMT threads in the core + nMemLanes: Int = 4, // # of memory lanes in the memory interface to the + // cache; relates to the LSU lanes + nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes +) +case class MemtraceCoreParams( + tracefilename: String = "undefined", + traceHasSource: Boolean = false +) + +case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/ ) +case object MemtraceCoreKey + extends Field[Option[MemtraceCoreParams]](None /*default*/ ) + +// ############################################################################# +// FIXME: copy-paste from MemFuzzer +// ############################################################################# + +class Emulator( + numLanes: Int, + numSrcIds: Int, + wordSizeInBytes: Int, +)(implicit p: Parameters) + extends LazyModule { + val laneNodes = Seq.tabulate(numLanes) { i => + val clientParam = Seq( + TLMasterParameters.v1( + name = "Emulator" + i.toString, + sourceId = IdRange(0, numSrcIds) + // visibility = Seq(AddressSet(0x0000, 0xffffff)) + ) + ) + TLClientNode(Seq(TLMasterPortParameters.v1(clientParam))) + } + + val node = TLIdentityNode() + laneNodes.foreach(node := _) + + lazy val module = new EmulatorImp(this, numLanes, numSrcIds, wordSizeInBytes) +} + +class EmulatorImp( + outer: Emulator, + numLanes : Int, + numSrcIds: Int, + wordSizeInBytes: Int, +) extends LazyModuleImp(outer) { + val io = IO(new Bundle { + val finished = Output(Bool()) + }) + val sim = Module(new SimEmulator(numLanes)) + sim.io.clock := clock + sim.io.reset := reset.asBool + + sim.io.a.ready := VecInit(outer.laneNodes.map { node => + val (tlOut, _) = node.out(0) + tlOut.a.ready + }).asUInt + + io.finished := sim.io.finished + + // connect Verilog <-> Chisel IO + // Verilog IO flattened across all lanes + val laneReqs = Wire(Vec(numLanes, Decoupled(new TraceLine))) + val addrW = laneReqs(0).bits.address.getWidth + val sizeW = laneReqs(0).bits.size.getWidth + val dataW = laneReqs(0).bits.data.getWidth + laneReqs.zipWithIndex.foreach { case (req, i) => + req.valid := sim.io.a.valid(i) + req.bits.source := 0.U // DPI doesn't generate contain source id + req.bits.address := sim.io.a.address(addrW * (i + 1) - 1, addrW * i) + req.bits.is_store := sim.io.a.is_store(i) + req.bits.size := sim.io.a.size(sizeW * (i + 1) - 1, sizeW * i) + req.bits.data := sim.io.a.data(dataW * (i + 1) - 1, dataW * i) + } + sim.io.a.ready := VecInit(laneReqs.map(_.ready)).asUInt + + val laneResps = Wire(Vec(numLanes, Flipped(Decoupled(new TraceLine)))) + laneResps.zipWithIndex.foreach { case (resp, i) => + resp.ready := sim.io.d.ready(i) + // TODO: not handled in DPI + resp.bits.source := DontCare + resp.bits.address := DontCare + resp.bits.data := DontCare + } + sim.io.d.valid := VecInit(laneResps.map(_.valid)).asUInt + sim.io.d.is_store := VecInit(laneResps.map(_.bits.is_store)).asUInt + sim.io.d.size := VecInit(laneResps.map(_.bits.size)).asUInt + + val sourceGens = Seq.fill(numLanes)( + Module( + new SourceGenerator( + log2Ceil(numSrcIds), + ignoreInUse = false + ) + ) + ) + val anyInflight = sourceGens.map(_.io.inflight).reduce(_ || _) + sim.io.inflight := anyInflight + + // Take requests off of the queue and generate TL requests + (outer.laneNodes zip (laneReqs zip laneResps)).zipWithIndex.foreach { + case ((node, (req, resp)), lane) => + val (tlOut, edge) = node.out(0) + + // Requests -------------------------------------------------------------- + // + // Core only makes accesses of granularity larger than a word, so we want + // the trace driver to act so as well. + // That means if req.size is smaller than word size, we need to pad data + // with zeros to generate a word-size request, and set mask accordingly. + val offsetInWord = req.bits.address % wordSizeInBytes.U + val subword = req.bits.size < log2Ceil(wordSizeInBytes).U + + // `mask` is currently unused + // val mask = Wire(UInt(wordSizeInBytes.W)) + val wordData = Wire(UInt((wordSizeInBytes * 8 * 2).W)) + val sizeInBytes = Wire(UInt((sizeW + 1).W)) + sizeInBytes := (1.U) << req.bits.size + // mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U) + wordData := Mux(subword, req.bits.data << (offsetInWord * 8.U), req.bits.data) + val wordAlignedAddress = + req.bits.address & ~((1 << log2Ceil(wordSizeInBytes)) - 1).U(addrW.W) + val wordAlignedSize = Mux(subword, 2.U, req.bits.size) + + val sourceGen = sourceGens(lane) + sourceGen.io.gen := tlOut.a.fire + sourceGen.io.reclaim.valid := tlOut.d.fire + sourceGen.io.reclaim.bits := tlOut.d.bits.source + sourceGen.io.meta := DontCare + + val (plegal, pbits) = edge.Put( + fromSource = sourceGen.io.id.bits, + toAddress = wordAlignedAddress, + lgSize = wordAlignedSize, // trace line already holds log2(size) + // data should be aligned to beatBytes + data = + (wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt + ) + val (glegal, gbits) = edge.Get( + fromSource = sourceGen.io.id.bits, + toAddress = wordAlignedAddress, + lgSize = wordAlignedSize + ) + val legal = Mux(req.bits.is_store, plegal, glegal) + val bits = Mux(req.bits.is_store, pbits, gbits) + + tlOut.a.valid := req.valid && sourceGen.io.id.valid + req.ready := tlOut.a.ready && sourceGen.io.id.valid + + when(tlOut.a.fire) { + assert(legal, "illegal TL req gen") + } + tlOut.a.bits := bits + + // Responses ------------------------------------------------------------- + // + tlOut.d.ready := resp.ready + resp.valid := tlOut.d.valid + resp.bits.is_store := !edge.hasData(tlOut.d.bits) + resp.bits.size := tlOut.d.bits.size + + tlOut.b.ready := true.B + tlOut.c.valid := false.B + tlOut.e.valid := false.B + + // debug + dontTouch(req) + when(tlOut.a.valid) { + printf(s"Lane ${lane}: "); + TLPrintf( + "Emulator", + tlOut.a.bits.source, + tlOut.a.bits.address, + tlOut.a.bits.size, + tlOut.a.bits.mask, + req.bits.is_store, + tlOut.a.bits.data, + req.bits.data + ) + } + dontTouch(tlOut.a) + dontTouch(tlOut.d) + } + + // when(traceFinished && allReqReclaimed && noValidReqs) { + // assert( + // false.B, + // "\n\n\nsimulation Successfully finished\n\n\n (this assertion intentional fail upon MemTracer termination)" + // ) + // } +} + +class SimEmulator(numLanes: Int) + extends BlackBox(Map("NUM_LANES" -> numLanes)) + with HasBlackBoxResource { + val traceLineT = new TraceLine + val addrW = traceLineT.address.getWidth + val sizeW = traceLineT.size.getWidth + val dataW = traceLineT.data.getWidth + val io = IO(new Bundle { + val clock = Input(Clock()) + val reset = Input(Bool()) + val inflight = Input(Bool()) + val finished = Output(Bool()) + + val a = + new Bundle { + val ready = Input(UInt(numLanes.W)) + val valid = Output(UInt(numLanes.W)) + // Chisel can't interface with Verilog 2D port, so flatten all lanes into + // single wide 1D array. + val address = Output(UInt((addrW * numLanes).W)) + val is_store = Output(UInt(numLanes.W)) + val size = Output(UInt((sizeW * numLanes).W)) + val data = Output(UInt((dataW * numLanes).W)) + } + val d = + new Bundle { + val ready = Output(UInt(numLanes.W)) + val valid = Input(UInt(numLanes.W)) + val is_store = Input(UInt(numLanes.W)) + val size = Input(UInt((sizeW * numLanes).W)) + } + }) + + addResource("/vsrc/SimDefaults.vh") + addResource("/vsrc/SimEmulator.v") + addResource("/csrc/SimEmulator.cc") +} + diff --git a/src/main/scala/radiance/memory/CanHaveMemtraceCore.scala b/src/main/scala/radiance/memory/CanHaveMemtraceCore.scala index 0801071..658db38 100644 --- a/src/main/scala/radiance/memory/CanHaveMemtraceCore.scala +++ b/src/main/scala/radiance/memory/CanHaveMemtraceCore.scala @@ -4,6 +4,7 @@ import freechips.rocketchip.diplomacy.LazyModule import freechips.rocketchip.subsystem._ import org.chipsalliance.cde.config.Parameters import freechips.rocketchip.tilelink._ +import radiance.core.{SIMTCoreKey, MemtraceCoreKey} // TODO: possibly move to somewhere closer to CoalescingUnit // TODO: separate coalescer config from CanHaveMemtraceCore diff --git a/src/main/scala/radiance/memory/Coalescing.scala b/src/main/scala/radiance/memory/Coalescing.scala index aafe29c..5f24cc3 100644 --- a/src/main/scala/radiance/memory/Coalescing.scala +++ b/src/main/scala/radiance/memory/Coalescing.scala @@ -10,25 +10,10 @@ import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp} import freechips.rocketchip.util.{Code, MultiPortQueue, OnePortLanePositionedQueue} import freechips.rocketchip.unittest._ import freechips.rocketchip.tilelink._ +import radiance.core.{SIMTCoreParams, SIMTCoreKey} -// TODO: find better place for these - -case class SIMTCoreParams( - nWarps: Int = 4, // # of warps in the core - nCoreLanes: Int = 4, // # of SIMT threads in the core - nMemLanes: Int = 4, // # of memory lanes in the memory interface to the - // cache; relates to the LSU lanes - nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes -) -case class MemtraceCoreParams( - tracefilename: String = "undefined", - traceHasSource: Boolean = false -) case class CoalXbarParam() -case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/ ) -case object MemtraceCoreKey - extends Field[Option[MemtraceCoreParams]](None /*default*/ ) case object CoalescerKey extends Field[Option[CoalescerConfig]](None /*default*/ ) case object CoalXbarKey extends Field[Option[CoalXbarParam]](None /*default*/ ) diff --git a/src/main/scala/radiance/subsystem/Configs.scala b/src/main/scala/radiance/subsystem/Configs.scala index 4a3a940..522fb48 100644 --- a/src/main/scala/radiance/subsystem/Configs.scala +++ b/src/main/scala/radiance/subsystem/Configs.scala @@ -12,6 +12,7 @@ import freechips.rocketchip.subsystem._ import gemmini._ import gemmini.Arithmetic.FloatArithmetic._ import radiance.tile._ +import radiance.core._ import radiance.memory._ import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8} @@ -106,6 +107,44 @@ class WithRadianceCores( ), tensorCoreFP16, tensorCoreDecoupled, useVxCache) } +class WithEmulatorCores( + n: Int, + useVxCache: Boolean +) extends Config((site, _, up) => { + case TilesLocated(InSubsystem) => { + val prev = up(TilesLocated(InSubsystem)) + val idOffset = up(NumTiles) + val emulator = EmulatorTileParams( + core = VortexCoreParams(), + useVxCache = useVxCache) + List.tabulate(n)(i => EmulatorTileAttachParams( + emulator.copy(tileId = i + idOffset), + RocketCrossingParams() + )) ++ prev + } + case NumTiles => up(NumTiles) + 1 + case NumRadianceCores => up(NumRadianceCores) + 1 +}) + +class WithFuzzerCores( + n: Int, + useVxCache: Boolean +) extends Config((site, _, up) => { + case TilesLocated(InSubsystem) => { + val prev = up(TilesLocated(InSubsystem)) + val idOffset = up(NumTiles) + val fuzzer = FuzzerTileParams( + core = VortexCoreParams(), + useVxCache = useVxCache) + List.tabulate(n)(i => FuzzerTileAttachParams( + fuzzer.copy(tileId = i + idOffset), + RocketCrossingParams() + )) ++ prev + } + case NumTiles => up(NumTiles) + 1 + case NumRadianceCores => up(NumRadianceCores) + 1 +}) + object RadianceGemminiDataType extends Enumeration { type Type = Value val FP32, FP16, BF16, Int8 = Value @@ -244,25 +283,6 @@ class WithRadianceFrameBuffer(baseAddress: BigInt, } }) -class WithFuzzerCores( - n: Int, - useVxCache: Boolean -) extends Config((site, _, up) => { - case TilesLocated(InSubsystem) => { - val prev = up(TilesLocated(InSubsystem)) - val idOffset = up(NumTiles) - val fuzzer = FuzzerTileParams( - core = VortexCoreParams(), - useVxCache = useVxCache) - List.tabulate(n)(i => FuzzerTileAttachParams( - fuzzer.copy(tileId = i + idOffset), - RocketCrossingParams() - )) ++ prev - } - case NumTiles => up(NumTiles) + 1 - case NumRadianceCores => up(NumRadianceCores) + 1 -}) - class WithRadianceCluster( clusterId: Int, location: HierarchicalLocation = InSubsystem, diff --git a/src/main/scala/radiance/tile/EmulatorTile.scala b/src/main/scala/radiance/tile/EmulatorTile.scala new file mode 100644 index 0000000..d6881ca --- /dev/null +++ b/src/main/scala/radiance/tile/EmulatorTile.scala @@ -0,0 +1,96 @@ +// See LICENSE.SiFive for license details. +// See LICENSE.Berkeley for license details. + +package radiance.tile + +import chisel3._ +import org.chipsalliance.cde.config.Parameters +import org.chipsalliance.diplomacy.lazymodule.LazyModule +import freechips.rocketchip.resources.SimpleDevice +import freechips.rocketchip.prci.ClockCrossingType +import freechips.rocketchip.rocket._ +import freechips.rocketchip.tile._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile} +import freechips.rocketchip.prci.{ClockSinkParameters} +import radiance.core._ +import radiance.memory.{CoalescingUnit, CoalescerKey} + +// TODO: De-duplicate between this and FuzzerTile + +case class EmulatorTileParams( + core: VortexCoreParams = VortexCoreParams(), // TODO: remove this + useVxCache: Boolean = false, + tileId: Int = 0, +) extends InstantiableTileParams[EmulatorTile] { + def instantiate(crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByHartIdImpl)( + implicit p: Parameters + ): EmulatorTile = { + new EmulatorTile(this, crossing, lookup) + } + val clockSinkParams = ClockSinkParameters() + val blockerCtrlAddr = None + val icache = None + val dcache = None + val btb = None + val baseName = "radiance_emulator_tile" + val uniqueName = s"${baseName}_$tileId" +} + +case class EmulatorTileAttachParams( + tileParams: EmulatorTileParams, + crossingParams: HierarchicalElementCrossingParamsLike +) extends CanAttachTile { type TileType = EmulatorTile } + +class EmulatorTile private ( + val EmulatorParams: EmulatorTileParams, + crossing: ClockCrossingType, + lookup: LookupByHartIdImpl, + q: Parameters +) extends BaseTile(EmulatorParams, crossing, lookup, q) + with SinksExternalInterrupts + with SourcesExternalNotifications { + def this( + params: EmulatorTileParams, + crossing: HierarchicalElementCrossingParamsLike, + lookup: LookupByHartIdImpl + )(implicit p: Parameters) = + this(params, crossing.crossingType, lookup, p) + + val cpuDevice: SimpleDevice = new SimpleDevice("emulator", Nil) + + val intOutwardNode = None + val slaveNode: TLInwardNode = TLIdentityNode() + val masterNode = visibilityNode + // val statusNode = BundleBridgeSource(() => new GroundTestStatus) + + val (numLanes, numSrcIds) = p(SIMTCoreKey) match { + case Some(param) => (param.nMemLanes, param.nSrcIds) + case None => { + require(false, "emulator requires SIMTCoreKey to be defined") + (0, 0) + } + } + // FIXME: parameterize + val wordSizeInBytes = 4 + + val emulator = LazyModule(new Emulator(numLanes, numSrcIds, wordSizeInBytes)) + + // Conditionally instantiate memory coalescer + val coalescerNode = p(CoalescerKey) match { + case Some(coalParam) => { + val coal = LazyModule(new CoalescingUnit(coalParam)) + coal.cpuNode :=* TLWidthWidget(4) :=* emulator.node + coal.aggregateNode + } + case None => emulator.node + } + + masterNode :=* coalescerNode + + override lazy val module = new EmulatorTileModuleImp(this) +} + +class EmulatorTileModuleImp(outer: EmulatorTile) extends BaseTileModuleImp(outer) { + outer.reportCease(Some(outer.emulator.module.io.finished)) +} diff --git a/src/main/scala/radiance/tile/FuzzerTile.scala b/src/main/scala/radiance/tile/FuzzerTile.scala index 730f04a..5e17672 100644 --- a/src/main/scala/radiance/tile/FuzzerTile.scala +++ b/src/main/scala/radiance/tile/FuzzerTile.scala @@ -13,6 +13,7 @@ import freechips.rocketchip.tile._ import freechips.rocketchip.tilelink._ import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile} import freechips.rocketchip.prci.{ClockSinkParameters} +import radiance.core.{SIMTCoreKey} import radiance.memory._ case class FuzzerTileParams( diff --git a/src/main/scala/radiance/tile/RadianceTile.scala b/src/main/scala/radiance/tile/RadianceTile.scala index 202543a..f4e4165 100644 --- a/src/main/scala/radiance/tile/RadianceTile.scala +++ b/src/main/scala/radiance/tile/RadianceTile.scala @@ -19,6 +19,7 @@ import freechips.rocketchip.tilelink._ import freechips.rocketchip.util._ import midas.targetutils.SynthesizePrintf import org.chipsalliance.cde.config._ +import radiance.core._ import radiance.memory._ import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceSimArgs}