diff --git a/src/main/scala/radiance/memory/DoubleOutXbar.scala b/src/main/scala/radiance/memory/DoubleOutXbar.scala index 20c5eec..67a9158 100644 --- a/src/main/scala/radiance/memory/DoubleOutXbar.scala +++ b/src/main/scala/radiance/memory/DoubleOutXbar.scala @@ -2,7 +2,7 @@ package radiance.memory import chisel3._ import chisel3.util._ -import freechips.rocketchip.diplomacy.{AddressSet, TransferSizes, IdRange} +import freechips.rocketchip.diplomacy.{AddressSet, TransferSizes, IdRange, BufferParams} import freechips.rocketchip.tilelink._ import freechips.rocketchip.util.BundleField import org.chipsalliance.cde.config.Parameters @@ -77,23 +77,29 @@ class DuplicatorNode(override val name: String = "dup") sourceEnq.bits := nodeIn.a.bits.source val idQueue = Queue(sourceEnq, entries = 4, pipe = false, flow = false) + val srcMatch = nodeOuts.map(_.d.bits.source(inSourceWidth - 1, 0) === idQueue.bits) - idQueue.ready := nodeIn.d.ready && srcMatch.reduce(_ || _) + idQueue.ready := nodeIn.d.fire assert(sourceEnq.fire === nodeIn.a.fire) assert(idQueue.fire === nodeIn.d.fire) - (nodeOuts zip srcMatch).foreach { case (o, m) => + (nodeOuts lazyZip srcMatch lazyZip Seq(0, inSourceEnd)).foreach { case (o, m, p) => o.a.bits := nodeIn.a.bits - o.a.bits.source := nodeIn.a.bits.source | inSourceEnd.U + o.a.bits.source := nodeIn.a.bits.source | p.U o.a.valid := nodeIn.a.valid - nodeIn.d.bits := o.d.bits - nodeIn.d.bits.source := o.d.bits.source(inSourceWidth - 1, 0) - nodeIn.d.valid := o.d.valid o.d.ready := nodeIn.d.ready && m } - assert(!(nodeOuts.head.a.ready && nodeOuts.last.a.ready) || !nodeIn.a.valid, "double output fire") + nodeIn.d.bits := MuxCase(DontCare, (nodeOuts zip srcMatch).map { case (o, m) => + m -> o.d.bits + }) + nodeIn.d.bits.source := MuxCase(DontCare, (nodeOuts zip srcMatch).map { case (o, m) => + m -> o.d.bits.source(inSourceWidth - 1, 0) + }) + nodeIn.d.valid := (nodeOuts zip srcMatch).map { case (o, m) => o.d.valid && m }.reduce(_ || _) nodeIn.a.ready := nodeOuts.map(_.a.ready).reduce(_ || _) && sourceEnq.ready + + assert(!(nodeOuts.head.a.ready && nodeOuts.last.a.ready) || !nodeIn.a.valid, "double output fire") } } @@ -105,24 +111,30 @@ object DuplicatorNode { class DoubleOutXbar(clients: Seq[TLNode], override val name: String = "2o_xbar") (implicit p: Parameters) extends LazyModule { - val xbar0 = TLXbar(TLArbiter.lowestIndexFirst) - val xbar1 = TLXbar(TLArbiter.lowestIndexFirst) + val xbar0 = TLXbar(TLArbiter.lowestIndexFirst, Some("double_out_xbar0")) + val xbar1 = TLXbar(TLArbiter.lowestIndexFirst, Some("double_out_xbar1")) implicit val disableMonitors: Boolean = false + val bufGen = () => TLBuffer(ace = BufferParams(0), bd = BufferParams(2, flow = false, pipe = false)) val dupedIds = clients.map(connectOne(_, DuplicatorNode.apply)).map { c => val id0 = connectOne(c, TLIdentityNode.apply) val id1 = connectOne(c, TLIdentityNode.apply) - xbar0 := id0 - xbar1 := id1 + xbar0 := connectOne(id0, bufGen) + xbar1 := connectOne(id1, bufGen) Seq(id0, id1) }.transpose lazy val module = new LazyModuleImp(this) { val id0InReadys = VecInit(dupedIds.head.map(_.in.head._1.a.ready)).asUInt val id1InValids = VecInit(dupedIds.last.map(_.in.head._1.a.valid)).asUInt - (dupedIds.last.map(_.out.head._1.a.valid) zip (id1InValids & (~id0InReadys).asUInt).asBools) + val id1OutValids = dupedIds.last.map(_.out.head._1.a.valid) + val id1InReadys = dupedIds.last.map(_.in.head._1.a.ready) + val id1OutReadys = VecInit(dupedIds.last.map(_.out.head._1.a.ready)).asUInt + (id1OutValids zip (id1InValids & (~id0InReadys).asUInt).asBools) .foreach { case (o, i) => o := i } + (id1InReadys zip (id1OutReadys & (~id0InReadys).asUInt).asBools) + .foreach { case (i, o) => i := o } } } diff --git a/src/main/scala/radiance/memory/RWSplitterNode.scala b/src/main/scala/radiance/memory/RWSplitterNode.scala index 5464b72..1cfceba 100644 --- a/src/main/scala/radiance/memory/RWSplitterNode.scala +++ b/src/main/scala/radiance/memory/RWSplitterNode.scala @@ -16,6 +16,7 @@ class RWSplitterNode(visibility: Option[AddressSet], override val name: String = // splits & arbitrates them into one client node per type of operation; // there will be N incoming edges, two outgoing edges, with two N:1 muxes; // it keeps the read and write channels fully separate to allow parallel processing. + suggestName(name) val node = TLNexusNode( clientFn = { seq => val in_mapping = TLXbar.mapInputIds(seq) @@ -155,6 +156,11 @@ object RWSplitterNode { LazyModule(new RWSplitterNode(None, name = valName.value)).node } + def apply(name: String) + (implicit p: Parameters, valName: ValName, sourceInfo: SourceInfo): TLNexusNode = { + LazyModule(new RWSplitterNode(None, name = name)).node + } + def apply(visibility: AddressSet) (implicit p: Parameters, valName: ValName, sourceInfo: SourceInfo): TLNexusNode = { apply(visibility, valName.value) diff --git a/src/main/scala/radiance/memory/XbarWithExtPolicy.scala b/src/main/scala/radiance/memory/XbarWithExtPolicy.scala index 071c106..58131e1 100644 --- a/src/main/scala/radiance/memory/XbarWithExtPolicy.scala +++ b/src/main/scala/radiance/memory/XbarWithExtPolicy.scala @@ -53,10 +53,12 @@ object XbarWithExtPolicy { object XbarWithExtPolicyNoFallback { def apply(nameSuffix: Option[String] = None) - (implicit p: Parameters): (XbarWithExtPolicy, TLIdentityNode) = { + (implicit p: Parameters): (XbarWithExtPolicy, TLIdentityNode, TLIdentityNode) = { val inIdNode = TLIdentityNode() + val outIdNode = TLIdentityNode() val xbar = LazyModule(new XbarWithExtPolicy(nameSuffix, false)) xbar.node :=* inIdNode - (xbar, inIdNode) + outIdNode :=* xbar.node + (xbar, inIdNode, outIdNode) } } diff --git a/src/main/scala/radiance/tile/VirgoSharedMemComponents.scala b/src/main/scala/radiance/tile/VirgoSharedMemComponents.scala index 8cc0072..ae143eb 100644 --- a/src/main/scala/radiance/tile/VirgoSharedMemComponents.scala +++ b/src/main/scala/radiance/tile/VirgoSharedMemComponents.scala @@ -108,7 +108,8 @@ class VirgoSharedMemComponents( // tensor core read nodes val tcDistNodes = Seq.fill(smemBanks)(tcNodeFanouts.map(connectOne(_, () => DistributorNode(smemWidth, wordSize)))) val tcNodes = tcDistNodes.map { tcBank => - Seq.fill(smemSubbanks)(tcBank.map(connectOne(_, () => TLBuffer(BufferParams(2, false, false)))).map(connectXbarName(_, Some("tc_dist_fanout")))) + Seq.fill(smemSubbanks)(tcBank.map(connectOne(_, + () => TLBuffer(BufferParams(2, false, false)))).map(connectXbarName(_, Some("tc_dist_fanout")))) } // (banks, subbanks, tc client) val unalignedRWNodes: ArrayBuffer[ArrayBuffer[TLNexusNode]] = // mutable for readability @@ -195,7 +196,7 @@ class VirgoSharedMemComponents( val laneSerialXbar = laneSerialXbars.get(rw)(lid) laneSerialXbar._1.policySlaveNode := coreSerialPolicy.get(rw)(lid) coresRW.foreach(laneSerialXbar._2 := _) - connectXbarName(connectOne(laneSerialXbar._1.node, TLEphemeralNode.apply), Some(s"lane_${lid}_serial_out")) + connectXbarName(connectOne(laneSerialXbar._3, TLEphemeralNode.apply), Some(s"lane_${lid}_serial_out")) } } case NotSerialized => Seq.fill(2)(unalignedRWNodes.toSeq.flatten.map(connectXbar.apply)) @@ -216,7 +217,7 @@ class VirgoSharedMemComponents( (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes) } else { - val splitterNodes = radianceSmemFanout.map { connectOne(_, RWSplitterNode.apply) } + val splitterNodes = radianceSmemFanout.map { connectOne(_, () => RWSplitterNode("rad_fanout_splitter")) } // these nodes access an entire line simultaneously val uniformRNodes: Seq[Seq[Seq[TLNexusNode]]] = spadReadNodes val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]] = (spadWriteNodes zip spadSpWriteNodes).map { case (wb, wsb) => @@ -259,7 +260,7 @@ class VirgoSharedMemComponentsImp[T <: VirgoSharedMemComponents] (xbarsRW zip policiesRW).foreach { case (xbars, policies) => // for each lane, if any core is valid val coreValids = xbars.map(_._2.in.map(_._1)).transpose.map { core => VecInit(core.map(_.a.valid)).asUInt.orR } - val select = xbars.map(_._2.out.map(_._1)).transpose.map { core => VecInit(core.map(_.a.ready)).asUInt.orR } + val select = xbars.map(_._3.in.map(_._1)).transpose.map { core => VecInit(core.map(_.a.fire)).asUInt.orR } val coreSelect = TLArbiter.roundRobin(outer.numCores, VecInit(coreValids).asUInt, VecInit(select).asUInt.orR) // TODO: roll this into XbarWithExtPolicy xbars.foreach { lane =>