From a0bff40a872a7792b0cf0a868344d3718ea468f8 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 11 Jun 2024 16:17:08 -0700 Subject: [PATCH 1/3] Set correct static tileId for core and Gemmini tiles Otherwise, in multiple cluster configs, tiles can have duplicate global tileId which results in Diplomacy connection errors for interrupt nodes (among other things). --- src/main/scala/radiance/subsystem/Configs.scala | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/main/scala/radiance/subsystem/Configs.scala b/src/main/scala/radiance/subsystem/Configs.scala index 9ef551c..a64952e 100644 --- a/src/main/scala/radiance/subsystem/Configs.scala +++ b/src/main/scala/radiance/subsystem/Configs.scala @@ -43,7 +43,7 @@ class WithRadianceCores( ) extends Config((site, _, up) => { case TilesLocated(`location`) => { val prev = up(TilesLocated(`location`)) - val idOffset = prev.size + val idOffset = up(NumTiles) val vortex = RadianceTileParams( core = VortexCoreParams(fpu = None), btb = None, @@ -72,6 +72,7 @@ class WithRadianceCores( crossing )) ++ prev } + case NumTiles => up(NumTiles) + n }) { def this(n: Int, location: HierarchicalLocation = InSubsystem, useVxCache: Boolean = false) = this(n, location, RocketCrossingParams( master = HierarchicalElementMasterPortParams.locationDefault(location), @@ -88,8 +89,9 @@ class WithRadianceGemmini(location: HierarchicalLocation, dim: Int, accSizeInKB: Int, tileSize: Int) extends Config((site, _, up) => { case TilesLocated(`location`) => { val prev = up(TilesLocated(`location`)) - val idOffset = prev.size + val idOffset = up(NumTiles) if (idOffset == 0) { + // FIXME: this doesn't work for multiple clusters when idOffset may not be 0 println("******WARNING****** gemmini tile id is 0! radiance tiles in the same cluster needs to be before gemmini") } val smKey = site(RadianceSharedMemKey).get @@ -124,6 +126,7 @@ class WithRadianceGemmini(location: HierarchicalLocation, slaveAddress = smKey.address + smKey.size + 0x3000 )) } + case NumTiles => up(NumTiles) + 1 }) { def this(location: HierarchicalLocation = InSubsystem, dim: Int, accSizeInKB: Int, tileSize: Int) = this(location, RocketCrossingParams( @@ -172,7 +175,7 @@ class WithFuzzerCores( ) extends Config((site, _, up) => { case TilesLocated(InSubsystem) => { val prev = up(TilesLocated(InSubsystem)) - val idOffset = prev.size + val idOffset = up(NumTiles) val fuzzer = FuzzerTileParams( core = VortexCoreParams(fpu = None), useVxCache = useVxCache) @@ -181,6 +184,7 @@ class WithFuzzerCores( RocketCrossingParams() )) ++ prev } + case NumTiles => up(NumTiles) + 1 }) class WithRadianceCluster( @@ -288,7 +292,7 @@ class WithNCustomSmallRocketCores( ) extends Config((site, here, up) => { case TilesLocated(InSubsystem) => { val prev = up(TilesLocated(InSubsystem)) - val idOffset = overrideIdOffset.getOrElse(prev.size) + val idOffset = up(NumTiles) val med = RocketTileParams( core = RocketCoreParams(fpu = None), btb = None, @@ -316,6 +320,7 @@ class WithNCustomSmallRocketCores( crossing )) ++ prev } + case NumTiles => up(NumTiles) + n }) class WithExtGPUMem(address: BigInt = BigInt("0x100000000", 16), From 7ced63bd62ab4f596ced4b0b2d20867a63777853 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 11 Jun 2024 16:23:07 -0700 Subject: [PATCH 2/3] Remove clbus definition from RadianceCluster Should be in conjunction with the rocket-chip change that defines clbus in the base Cluster class. --- src/main/scala/radiance/tile/RadianceCluster.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/main/scala/radiance/tile/RadianceCluster.scala b/src/main/scala/radiance/tile/RadianceCluster.scala index 1040026..f8c868d 100644 --- a/src/main/scala/radiance/tile/RadianceCluster.scala +++ b/src/main/scala/radiance/tile/RadianceCluster.scala @@ -34,12 +34,6 @@ class RadianceCluster ( crossing: ClockCrossingType, lookup: LookupByClusterIdImpl )(implicit p: Parameters) extends Cluster(thisClusterParams, crossing, lookup) { - // cluster-local bus, used for shared memory traffic that never leaves the - // confines of a cluster - val clbus = tlBusWrapperLocationMap(CLBUS(clusterId)) - - clbus.clockGroupNode := allClockGroupsNode - // Instantiate cluster-local shared memory scratchpad // // Instantiate the same number of banks as there are lanes. From 1401c4a0908e8ec2e77109d8039d9a67024ff0d9 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Tue, 11 Jun 2024 17:13:51 -0700 Subject: [PATCH 3/3] Separate out core id from tile id in TileParams Create a new config key to distinguish number of cores from number of total tiles (which can be different when there are Gemmini tiles). It is important to give contiguous IDs for Vortex cores for the cluter-wide barrier to work. --- .../scala/radiance/subsystem/Configs.scala | 10 +++++++++- src/main/scala/radiance/tile/Barrier.scala | 2 +- .../scala/radiance/tile/RadianceCluster.scala | 13 +++++-------- .../scala/radiance/tile/RadianceTile.scala | 19 ++++++++++++------- src/main/scala/radiance/tile/VortexCore.scala | 7 +++---- 5 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/main/scala/radiance/subsystem/Configs.scala b/src/main/scala/radiance/subsystem/Configs.scala index a64952e..8020f02 100644 --- a/src/main/scala/radiance/subsystem/Configs.scala +++ b/src/main/scala/radiance/subsystem/Configs.scala @@ -44,6 +44,7 @@ class WithRadianceCores( case TilesLocated(`location`) => { val prev = up(TilesLocated(`location`)) val idOffset = up(NumTiles) + val coreIdOffset = up(NumRadianceCores) val vortex = RadianceTileParams( core = VortexCoreParams(fpu = None), btb = None, @@ -68,11 +69,15 @@ class WithRadianceCores( nTLBSuperpages = 1, blockBytes = site(CacheBlockBytes)))) List.tabulate(n)(i => RadianceTileAttachParams( - vortex.copy(tileId = i + idOffset), + vortex.copy( + tileId = i + idOffset, + coreId = i + coreIdOffset, + ), crossing )) ++ prev } case NumTiles => up(NumTiles) + n + case NumRadianceCores => up(NumRadianceCores) + n }) { def this(n: Int, location: HierarchicalLocation = InSubsystem, useVxCache: Boolean = false) = this(n, location, RocketCrossingParams( master = HierarchicalElementMasterPortParams.locationDefault(location), @@ -127,6 +132,8 @@ class WithRadianceGemmini(location: HierarchicalLocation, )) } case NumTiles => up(NumTiles) + 1 + // don't increment core id for Gemmini tiles + case NumRadianceCores => up(NumRadianceCores) }) { def this(location: HierarchicalLocation = InSubsystem, dim: Int, accSizeInKB: Int, tileSize: Int) = this(location, RocketCrossingParams( @@ -185,6 +192,7 @@ class WithFuzzerCores( )) ++ prev } case NumTiles => up(NumTiles) + 1 + case NumRadianceCores => up(NumRadianceCores) + 1 }) class WithRadianceCluster( diff --git a/src/main/scala/radiance/tile/Barrier.scala b/src/main/scala/radiance/tile/Barrier.scala index 269a62c..02ae4fd 100644 --- a/src/main/scala/radiance/tile/Barrier.scala +++ b/src/main/scala/radiance/tile/Barrier.scala @@ -73,7 +73,7 @@ class BarrierSynchronizer( ) extends Module { val numBarriers = 1 << param.barrierIdBits val numCores = 1 << param.numCoreBits - println(s"numBarriers: ${numBarriers}, numCores: ${numCores}") + println(s"======== numBarriers: ${numBarriers}, numCores: ${numCores}") val io = IO(new Bundle { val reqs = Vec(numCores, Flipped(Decoupled(new BarrierRequestBits(param)))) diff --git a/src/main/scala/radiance/tile/RadianceCluster.scala b/src/main/scala/radiance/tile/RadianceCluster.scala index f8c868d..7ee7025 100644 --- a/src/main/scala/radiance/tile/RadianceCluster.scala +++ b/src/main/scala/radiance/tile/RadianceCluster.scala @@ -51,7 +51,7 @@ class RadianceCluster ( val radianceTiles = leafTiles.values.filter(_.isInstanceOf[RadianceTile]).toSeq.asInstanceOf[Seq[RadianceTile]] - val numCores = leafTiles.size - gemminis.size + val numCoresInCluster = leafTiles.size - gemminis.size // ************************************** // ______ _________ ___ @@ -324,7 +324,7 @@ class RadianceCluster ( // connect tile smem nodes to xbar, and xbar to banks // val smem_xbar = TLXbar() - val radianceAccSlaveNodes = Seq.fill(numCores)(AccSlaveNode()) + val radianceAccSlaveNodes = Seq.fill(numCoresInCluster)(AccSlaveNode()) (radianceAccSlaveNodes zip radianceTiles).foreach { case (a, r) => a := r.accMasterNode } val gemminiAccMasterNode = AccMasterNode() gemminiTile.accSlaveNode := gemminiAccMasterNode @@ -332,8 +332,8 @@ class RadianceCluster ( val traceTLNode = TLAdapterNode(clientFn = c => c, managerFn = m => m) // printf and perf counter buffer - TLRAM(AddressSet(smem_key.address + smem_size, numCores * 0x200 - 1)) := traceTLNode := - TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode + TLRAM(AddressSet(smem_key.address + smem_size, numCoresInCluster * 0x200 - 1)) := + traceTLNode := TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode p(RadianceFrameBufferKey).foreach { key => val fb = LazyModule(new FrameBuffer(key.baseAddress, key.width, key.size, key.validAddress, key.fbName)) @@ -341,7 +341,7 @@ class RadianceCluster ( } // Diplomacy sink nodes for cluster-wide barrier sync signal - val barrierSlaveNode = BarrierSlaveNode(numCores) + val barrierSlaveNode = BarrierSlaveNode(numCoresInCluster) // HACK: This is a workaround of the CanAttachTile bus connecting API that // works by downcasting tile and directly accessing the node inside that is @@ -371,7 +371,6 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp( // cores are configured to have the same barrier id range. While true, might // be better to actually assert this val barrierParam = outer.barrierSlaveNode.in.head._2 - println(s"======= barrierParam: ${barrierParam}") val synchronizer = Module(new BarrierSynchronizer(barrierParam)) (synchronizer.io.reqs zip outer.barrierSlaveNode.in).foreach { case (req, (b, _)) => req <> b.req @@ -528,6 +527,4 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp( } makeSmemBanks() - - println(s"======== barrierSlaveNode: ${outer.barrierSlaveNode.in(0)._2.barrierIdBits}") } diff --git a/src/main/scala/radiance/tile/RadianceTile.scala b/src/main/scala/radiance/tile/RadianceTile.scala index 2a6325c..a05a045 100644 --- a/src/main/scala/radiance/tile/RadianceTile.scala +++ b/src/main/scala/radiance/tile/RadianceTile.scala @@ -21,6 +21,10 @@ import org.chipsalliance.cde.config._ import radiance.memory._ import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceSimArgs} +/** For determining radiance core id. This may be different from + * RadianceTileParams.coreId, when a cluster contains non-core tiles */ +case object NumRadianceCores extends Field[Int](0) + case class RadianceTileParams( core: VortexCoreParams = VortexCoreParams(), useVxCache: Boolean = false, @@ -30,6 +34,7 @@ case class RadianceTileParams( dataScratchpadBytes: Int = 0, name: Option[String] = Some("radiance_tile"), tileId: Int = 0, + coreId: Int = 0, beuAddr: Option[BigInt] = None, blockerCtrlAddr: Option[BigInt] = None, clockSinkParams: ClockSinkParameters = ClockSinkParameters(), @@ -210,7 +215,7 @@ class RadianceTile private ( clients = Seq( TLMasterParameters.v1( sourceId = IdRange(0, 1 << imemSourceWidth), - name = s"Vortex Core ${radianceParams.tileId} I-Mem $i", + name = s"Vortex Core ${radianceParams.coreId} I-Mem $i", requestFifo = true, supportsProbe = TransferSizes(1, lazyCoreParamsView.coreDataBytes), @@ -229,7 +234,7 @@ class RadianceTile private ( clients = Seq( TLMasterParameters.v1( sourceId = IdRange(0, 1 << dmemSourceWidth), - name = s"Vortex Core ${radianceParams.tileId} D-Mem Lane $i", + name = s"Vortex Core ${radianceParams.coreId} D-Mem Lane $i", requestFifo = true, supportsProbe = TransferSizes(1, lazyCoreParamsView.coreDataBytes), @@ -252,7 +257,7 @@ class RadianceTile private ( clients = Seq( TLMasterParameters.v1( sourceId = IdRange(0, 1 << smemSourceWidth), - name = s"Vortex Core ${radianceParams.tileId} SharedMem Lane $i", + name = s"Vortex Core ${radianceParams.coreId} SharedMem Lane $i", requestFifo = true, supportsProbe = TransferSizes(1, lazyCoreParamsView.coreDataBytes), @@ -285,7 +290,7 @@ class RadianceTile private ( TLMasterParameters.v1( // FIXME: need to also respect imemSourceWidth sourceId = IdRange(0, 1 << dmemSourceWidth), - name = s"Vortex Core ${radianceParams.tileId} Mem Interface", + name = s"Vortex Core ${radianceParams.coreId} Mem Interface", requestFifo = true, supportsProbe = TransferSizes(16, 16), // FIXME: hardcoded supportsGet = TransferSizes(16, 16), @@ -532,7 +537,7 @@ class RadianceTileModuleImp(outer: RadianceTile) core.io.imem.get(0).d <> imemTLAdapter.io.inResp performanceCounters(Seq(imemTLAdapter.io.inReq), Seq(imemTLAdapter.io.inResp), - desc = s"core${outer.tileId}-imem") + desc = s"core${outer.radianceParams.coreId}-imem") // now connect TL adapter downstream ports to the tile egress ports outer.imemNodes(0).out(0)._1.a <> imemTLAdapter.io.outReq @@ -641,7 +646,7 @@ class RadianceTileModuleImp(outer: RadianceTile) } performanceCounters(dmemTLAdapters.map(_.io.inReq), dmemTLAdapters.map(_.io.inResp), - desc = s"core${outer.tileId}-dmem") + desc = s"core${outer.radianceParams.coreId}-dmem") // now connect TL adapter downstream ports to the tile egress ports (dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlOut) => @@ -702,7 +707,7 @@ class RadianceTileModuleImp(outer: RadianceTile) } performanceCounters(smemTLAdapters.map(_.io.inReq), smemTLAdapters.map(_.io.inResp), - desc = s"core${outer.tileId}-smem") + desc = s"core${outer.radianceParams.coreId}-smem") // now connect TL adapter downstream ports to the tile egress ports (smemTLAdapters zip smemTLBundles) foreach { case (tlAdapter, tlOut) => diff --git a/src/main/scala/radiance/tile/VortexCore.scala b/src/main/scala/radiance/tile/VortexCore.scala index 08e206d..e42d0f1 100644 --- a/src/main/scala/radiance/tile/VortexCore.scala +++ b/src/main/scala/radiance/tile/VortexCore.scala @@ -117,11 +117,10 @@ class VortexBundle(tile: RadianceTile)(implicit p: Parameters) extends CoreBundl class Vortex(tile: RadianceTile)(implicit p: Parameters) extends BlackBox( - // Each Vortex core gets tied-off tileId of 0, 1, 2, 3, ... - // The actual MHARTID read by the program is different by warp, not core; - // see VX_csr_data that implements the read logic for CSR_MHARTID/GWID. + // Each Vortex core gets tied-off core id of 0, 1, 2, 3, which is global + // across multiple clusters. Map( - "CORE_ID" -> tile.tileParams.tileId, + "CORE_ID" -> tile.radianceParams.coreId, // TODO: can we get this as a parameter? "BOOTROM_HANG100" -> 0x10100, "NUM_THREADS" -> tile.numLsuLanes