diff --git a/src/main/scala/radiance/subsystem/Configs.scala b/src/main/scala/radiance/subsystem/Configs.scala index 83f8007..8c3b2dd 100644 --- a/src/main/scala/radiance/subsystem/Configs.scala +++ b/src/main/scala/radiance/subsystem/Configs.scala @@ -113,7 +113,7 @@ class WithRadianceGemmini(location: HierarchicalLocation, meshColumns = dim, tile_latency = 0, dma_maxbytes = site(CacheBlockBytes), - dma_buswidth = dim * 32, + dma_buswidth = 256, // TODO: parameterize tl_ext_mem_base = smKey.address, sp_banks = smKey.numBanks, sp_capacity = CapacityInKilobytes(smKey.size >> 10), diff --git a/src/main/scala/radiance/tile/AccNode.scala b/src/main/scala/radiance/tile/AccNode.scala index b6109bb..31a37d4 100644 --- a/src/main/scala/radiance/tile/AccNode.scala +++ b/src/main/scala/radiance/tile/AccNode.scala @@ -12,7 +12,7 @@ class AccBundle extends Bundle { val status = Input(UInt(1.W)) def dest(): UInt = { cmd.bits(7, 5) } - def masked(): UInt = { cmd.bits & x"ffffff1f".U } + def mask: UInt = x"ffffff1f".U } case class NullParams() diff --git a/src/main/scala/radiance/tile/GemminiTile.scala b/src/main/scala/radiance/tile/GemminiTile.scala index c0791c2..e2a5df9 100644 --- a/src/main/scala/radiance/tile/GemminiTile.scala +++ b/src/main/scala/radiance/tile/GemminiTile.scala @@ -188,14 +188,11 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer) ciscInst := 0.U.asTypeOf(ciscInstT) val tileSize = outer.gemminiParams.tileSize - val (boundsInst, spadQuartile) = if (tileSize == 4) { - (ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"4_00040004".U), 0x80) - } else if (tileSize == 8) { - (ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> x"8_00080008".U), 0x200) - } else { - (ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, _.rs2 -> (tileSize | (tileSize << 16) | (tileSize << 32)).U), + val (boundsInst, spadQuartile) = (ciscInstT.Lit(_.inst -> 0x1220b07b.U, _.rs1 -> 0.U, + _.rs2 -> (tileSize | (tileSize << 16) | (BigInt(tileSize) << 32)).U), tileSize * tileSize * outer.gemminiParams.gemminiConfig.DIM) - } + println(s"gemmini cisc initialized with DIM=${outer.gemminiParams.gemminiConfig.DIM}, tileSize=${tileSize}") + println(f"boundsInst=${boundsInst.litValue}%x, tileSize=${tileSize}, quartile=${spadQuartile}") when (ciscValid) { assert(!accSlave.cmd.valid, "cisc state machine already busy") switch (ciscId) { @@ -217,6 +214,12 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer) ciscInstT.Lit(_.inst -> 0x1020b07b.U, _.rs1 -> 0x1.U, _.rs2 -> x"0_000002b8".U) )) } + is (3.U) { + ciscInst := microcodeEntry(Seq(boundsInst, + ciscInstT.Lit(_.inst -> 0x3020b07b.U, _.rs1 -> (spadQuartile * 1).U, _.rs2 -> (spadQuartile * 4).U), + ciscInstT.Lit(_.inst -> 0x1020b07b.U, _.rs1 -> 0x0.U, _.rs2 -> x"0_000002b8".U) + )) + } is (8.U) { val inst = Wire(ciscInstT) inst.inst := 0x1820b07b.U diff --git a/src/main/scala/radiance/tile/RadianceCluster.scala b/src/main/scala/radiance/tile/RadianceCluster.scala index 9c60854..382c9cc 100644 --- a/src/main/scala/radiance/tile/RadianceCluster.scala +++ b/src/main/scala/radiance/tile/RadianceCluster.scala @@ -5,7 +5,7 @@ package radiance.tile import chisel3._ import chisel3.util._ -import freechips.rocketchip.diplomacy.{AddressSet, BufferParams, ClockCrossingType, TransferSizes} +import freechips.rocketchip.diplomacy.{AddressSet, BigIntHexContext, BufferParams, ClockCrossingType, TransferSizes} import org.chipsalliance.diplomacy.lazymodule._ import freechips.rocketchip.prci.ClockSinkParameters import freechips.rocketchip.subsystem._ @@ -183,8 +183,8 @@ class RadianceCluster ( guard_monitors { implicit p => dist := TLBuffer(BufferParams(1, false, true), BufferParams(0)) := node } - val fanout = Seq.fill(sp_subbanks) { - connect_xbar_name(dist, Some(s"spad_g${gemmini_idx}_fanout_$suffix")) + val fanout = Seq.tabulate(sp_subbanks) { w => + connect_xbar_name(dist, Some(s"spad_g${gemmini_idx}w${w}_fanout_$suffix")) } Seq.fill(smem_width / sp_width_bytes)(fanout).flatten // smem wider than spad, duplicate masters } @@ -336,8 +336,8 @@ class RadianceCluster ( val traceTLNode = TLAdapterNode(clientFn = c => c, managerFn = m => m) // printf and perf counter buffer - TLRAM(AddressSet(smem_key.address + smem_size, numCoresInCluster * 0x200 - 1)) := - traceTLNode := TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode + TLRAM(AddressSet(smem_key.address + smem_size, numCoresInCluster * 0x200 - 1)) := traceTLNode := + TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode p(RadianceFrameBufferKey).foreach { key => val fb = LazyModule(new FrameBuffer(key.baseAddress, key.width, key.size, key.validAddress, key.fbName)) @@ -381,7 +381,7 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp( b.resp <> synchronizer.io.resp // broadcast } - val coreAcc = outer.radianceAccSlaveNodes.head.in.head._1 + val coreAccs = outer.radianceAccSlaveNodes.map(_.in.head._1) val gemminiAccs = outer.gemminiAccMasterNodes.map(_.out.head._1) // val gemminiTileAcc = outer.gemminiTile.accSlaveNode.in.head._1 @@ -389,12 +389,14 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp( // gemminiAcc.status := gemminiTileAcc.status gemminiAccs.zipWithIndex.foreach { case (g, gi) => - g.cmd.bits := coreAcc.masked - g.cmd.valid := coreAcc.cmd.valid && (coreAcc.dest === gi.U) + val active = coreAccs.map(acc => acc.cmd.valid && (acc.dest() === gi.U)) + val selected = PriorityEncoder(active) + g.cmd.bits := VecInit(coreAccs.map(_.cmd.bits))(selected) & g.mask + g.cmd.valid := VecInit(active).reduceTree(_ || _) } // this might need some more tweaking (e.g. bitmask instead of or) - outer.radianceAccSlaveNodes.foreach(_.in.head._1.status := VecInit(gemminiAccs.map(_.status)).reduceTree(_ | _)) + coreAccs.foreach(_.status := VecInit(gemminiAccs.map(_.status)).reduceTree(_ | _)) (outer.traceTLNode.in.map(_._1) zip outer.traceTLNode.out.map(_._1)).foreach { case (i, o) => o.a <> i.a