emergency push
This commit is contained in:
134
src/main/scala/radiance/memory/DoubleOutXbar.scala
Normal file
134
src/main/scala/radiance/memory/DoubleOutXbar.scala
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
package radiance.memory
|
||||||
|
|
||||||
|
import chisel3._
|
||||||
|
import chisel3.util._
|
||||||
|
import freechips.rocketchip.diplomacy.{AddressSet, TransferSizes, IdRange}
|
||||||
|
import freechips.rocketchip.tilelink._
|
||||||
|
import freechips.rocketchip.util.BundleField
|
||||||
|
import org.chipsalliance.cde.config.Parameters
|
||||||
|
import org.chipsalliance.diplomacy.ValName
|
||||||
|
import org.chipsalliance.diplomacy.lazymodule._
|
||||||
|
|
||||||
|
class DuplicatorNode(override val name: String = "dup")
|
||||||
|
(implicit p: Parameters) extends LazyModule {
|
||||||
|
// tilelink node that has two identical managers for parallelizing request processing
|
||||||
|
// one of the two managers must deassert ready when A channel is valid
|
||||||
|
|
||||||
|
val node = TLNexusNode(
|
||||||
|
clientFn = { seq =>
|
||||||
|
val inMapping = TLXbar.mapInputIds(seq)
|
||||||
|
val sourceRange = IdRange(inMapping.map(_.start).min, inMapping.map(_.end).max)
|
||||||
|
assert((sourceRange.start == 0) && isPow2(sourceRange.end))
|
||||||
|
|
||||||
|
val visibilities = seq.flatMap(_.masters.flatMap(_.visibility))
|
||||||
|
val unifiedVis = if (visibilities.map(_ == AddressSet.everything).reduce(_ || _)) Seq(AddressSet.everything)
|
||||||
|
else AddressSet.unify(visibilities)
|
||||||
|
|
||||||
|
seq.head.v1copy(
|
||||||
|
echoFields = BundleField.union(seq.flatMap(_.echoFields)),
|
||||||
|
requestFields = BundleField.union(seq.flatMap(_.requestFields)),
|
||||||
|
responseKeys = seq.flatMap(_.responseKeys).distinct,
|
||||||
|
minLatency = seq.map(_.minLatency).min,
|
||||||
|
clients = Seq.tabulate(2) { i =>
|
||||||
|
TLMasterParameters.v1(
|
||||||
|
name = s"${name}_read_client",
|
||||||
|
sourceId = sourceRange.shift(sourceRange.size * i),
|
||||||
|
visibility = unifiedVis,
|
||||||
|
supportsProbe = TransferSizes.mincover(seq.map(_.anyEmitClaims.get)),
|
||||||
|
supportsGet = TransferSizes.mincover(seq.map(_.anyEmitClaims.get)),
|
||||||
|
supportsPutFull = TransferSizes.mincover(seq.map(_.anyEmitClaims.putFull)),
|
||||||
|
supportsPutPartial = TransferSizes.mincover(seq.map(_.anyEmitClaims.putPartial))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
},
|
||||||
|
managerFn = { seq =>
|
||||||
|
println(f"combined address range of $name managers: " +
|
||||||
|
f"${AddressSet.unify(seq.flatMap(_.slaves.flatMap(_.address)))}, supports:" +
|
||||||
|
f"${seq.map(_.anySupportClaims).reduce(_ mincover _)}")
|
||||||
|
|
||||||
|
seq.head.v1copy(
|
||||||
|
responseFields = BundleField.union(seq.flatMap(_.responseFields)),
|
||||||
|
requestKeys = seq.flatMap(_.requestKeys).distinct,
|
||||||
|
minLatency = seq.map(_.minLatency).min,
|
||||||
|
endSinkId = TLXbar.mapOutputIds(seq).map(_.end).max,
|
||||||
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
|
name = Some(s"${name}_manager"),
|
||||||
|
address = AddressSet.unify(seq.flatMap(_.slaves.flatMap(_.address))),
|
||||||
|
supports = seq.map(_.anySupportClaims).reduce(_ mincover _),
|
||||||
|
fifoId = Some(0),
|
||||||
|
))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
lazy val module = new LazyModuleImp(this) {
|
||||||
|
assert(node.out.length == 2, s"$name should have 2 outgoing edges but has ${node.out.length}")
|
||||||
|
assert(node.in.length == 1, s"$name should have one incoming edge but has ${node.in.length}")
|
||||||
|
|
||||||
|
val inSourceWidth = log2Ceil(node.in.head._2.master.endSourceId)
|
||||||
|
val inSourceEnd = 1 << inSourceWidth
|
||||||
|
|
||||||
|
val nodeIn = node.in.head._1
|
||||||
|
val nodeOuts = node.out.map(_._1)
|
||||||
|
|
||||||
|
val sourceEnq = Wire(DecoupledIO(UInt(inSourceWidth.W)))
|
||||||
|
sourceEnq.valid := nodeIn.a.valid && nodeOuts.map(_.a.ready).reduce(_ || _)
|
||||||
|
sourceEnq.bits := nodeIn.a.bits.source
|
||||||
|
|
||||||
|
val idQueue = Queue(sourceEnq, entries = 4, pipe = false, flow = false)
|
||||||
|
val srcMatch = nodeOuts.map(_.d.bits.source(inSourceWidth - 1, 0) === idQueue.bits)
|
||||||
|
idQueue.ready := nodeIn.d.ready && srcMatch.reduce(_ || _)
|
||||||
|
|
||||||
|
assert(sourceEnq.fire === nodeIn.a.fire)
|
||||||
|
assert(idQueue.fire === nodeIn.d.fire)
|
||||||
|
|
||||||
|
(nodeOuts zip srcMatch).foreach { case (o, m) =>
|
||||||
|
o.a.bits := nodeIn.a.bits
|
||||||
|
o.a.bits.source := nodeIn.a.bits.source | inSourceEnd.U
|
||||||
|
o.a.valid := nodeIn.a.valid
|
||||||
|
nodeIn.d.bits := o.d.bits
|
||||||
|
nodeIn.d.bits.source := o.d.bits.source(inSourceWidth - 1, 0)
|
||||||
|
nodeIn.d.valid := o.d.valid
|
||||||
|
o.d.ready := nodeIn.d.ready && m
|
||||||
|
}
|
||||||
|
assert(!(nodeOuts.head.a.ready && nodeOuts.last.a.ready) || !nodeIn.a.valid, "double output fire")
|
||||||
|
nodeIn.a.ready := nodeOuts.map(_.a.ready).reduce(_ || _) && sourceEnq.ready
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object DuplicatorNode {
|
||||||
|
def apply()(implicit p: Parameters): TLNexusNode = {
|
||||||
|
LazyModule(new DuplicatorNode()).node
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class DoubleOutXbar(clients: Seq[TLNode], override val name: String = "2o_xbar")
|
||||||
|
(implicit p: Parameters) extends LazyModule {
|
||||||
|
val xbar0 = TLXbar(TLArbiter.lowestIndexFirst)
|
||||||
|
val xbar1 = TLXbar(TLArbiter.lowestIndexFirst)
|
||||||
|
|
||||||
|
implicit val disableMonitors: Boolean = false
|
||||||
|
|
||||||
|
val dupedIds = clients.map(connectOne(_, DuplicatorNode.apply)).map { c =>
|
||||||
|
val id0 = connectOne(c, TLIdentityNode.apply)
|
||||||
|
val id1 = connectOne(c, TLIdentityNode.apply)
|
||||||
|
xbar0 := id0
|
||||||
|
xbar1 := id1
|
||||||
|
Seq(id0, id1)
|
||||||
|
}.transpose
|
||||||
|
|
||||||
|
lazy val module = new LazyModuleImp(this) {
|
||||||
|
val id0InReadys = VecInit(dupedIds.head.map(_.in.head._1.a.ready)).asUInt
|
||||||
|
val id1InValids = VecInit(dupedIds.last.map(_.in.head._1.a.valid)).asUInt
|
||||||
|
(dupedIds.last.map(_.out.head._1.a.valid) zip (id1InValids & (~id0InReadys).asUInt).asBools)
|
||||||
|
.foreach { case (o, i) => o := i }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object DoubleOutXbar {
|
||||||
|
def apply(clients: Seq[TLNode])(implicit p: Parameters): Seq[TLNode] = {
|
||||||
|
val doubleOutXbar: DoubleOutXbar = LazyModule(new DoubleOutXbar(clients))
|
||||||
|
Seq(doubleOutXbar.xbar0, doubleOutXbar.xbar1)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -16,16 +16,20 @@ import radiance.memory._
|
|||||||
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
|
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
|
||||||
|
|
||||||
sealed trait RadianceSmemSerialization
|
sealed trait RadianceSmemSerialization
|
||||||
|
|
||||||
case object FullySerialized extends RadianceSmemSerialization
|
case object FullySerialized extends RadianceSmemSerialization
|
||||||
case object CoreSerialized extends RadianceSmemSerialization
|
case object CoreSerialized extends RadianceSmemSerialization
|
||||||
case object NotSerialized extends RadianceSmemSerialization
|
case object NotSerialized extends RadianceSmemSerialization
|
||||||
|
|
||||||
|
sealed trait MemType
|
||||||
|
case object TwoPort extends MemType
|
||||||
|
case object TwoReadOneWrite extends MemType
|
||||||
|
|
||||||
case class RadianceSharedMemKey(address: BigInt,
|
case class RadianceSharedMemKey(address: BigInt,
|
||||||
size: Int,
|
size: Int,
|
||||||
numBanks: Int,
|
numBanks: Int,
|
||||||
numWords: Int,
|
numWords: Int,
|
||||||
wordSize: Int = 4,
|
wordSize: Int = 4,
|
||||||
|
memType: MemType = TwoPort,
|
||||||
strideByWord: Boolean = true,
|
strideByWord: Boolean = true,
|
||||||
filterAligned: Boolean = true,
|
filterAligned: Boolean = true,
|
||||||
disableMonitors: Boolean = true,
|
disableMonitors: Boolean = true,
|
||||||
@@ -197,6 +201,7 @@ class WithRadianceSharedMem(address: BigInt,
|
|||||||
size: Int,
|
size: Int,
|
||||||
numBanks: Int,
|
numBanks: Int,
|
||||||
numWords: Int,
|
numWords: Int,
|
||||||
|
memType: MemType = TwoPort,
|
||||||
strideByWord: Boolean = true,
|
strideByWord: Boolean = true,
|
||||||
filterAligned: Boolean = true,
|
filterAligned: Boolean = true,
|
||||||
disableMonitors: Boolean = true,
|
disableMonitors: Boolean = true,
|
||||||
@@ -205,8 +210,8 @@ class WithRadianceSharedMem(address: BigInt,
|
|||||||
case RadianceSharedMemKey => {
|
case RadianceSharedMemKey => {
|
||||||
require(isPow2(size) && size >= 1024)
|
require(isPow2(size) && size >= 1024)
|
||||||
Some(RadianceSharedMemKey(
|
Some(RadianceSharedMemKey(
|
||||||
address, size, numBanks, numWords, 4, strideByWord,
|
address, size, numBanks, numWords, 4, memType,
|
||||||
filterAligned, disableMonitors, serializeUnaligned
|
strideByWord, filterAligned, disableMonitors, serializeUnaligned
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -7,8 +7,9 @@ import org.chipsalliance.cde.config.Parameters
|
|||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
import freechips.rocketchip.diplomacy.{AddressSet, TransferSizes}
|
import freechips.rocketchip.diplomacy.{AddressSet, TransferSizes}
|
||||||
import gemmini.Pipeline
|
import gemmini.Pipeline
|
||||||
import radiance.subsystem.RadianceSharedMemKey
|
import radiance.subsystem.{RadianceSharedMemKey, TwoPort, TwoReadOneWrite}
|
||||||
import radiance.memory._
|
import radiance.memory._
|
||||||
|
|
||||||
import scala.collection.mutable.ArrayBuffer
|
import scala.collection.mutable.ArrayBuffer
|
||||||
|
|
||||||
abstract class RadianceSmemNodeProvider {
|
abstract class RadianceSmemNodeProvider {
|
||||||
@@ -49,60 +50,72 @@ class RadianceSharedMem[T <: RadianceSmemNodeProvider](
|
|||||||
require(isPow2(smemSubbanks))
|
require(isPow2(smemSubbanks))
|
||||||
(0 until smemBanks).flatMap { bid =>
|
(0 until smemBanks).flatMap { bid =>
|
||||||
(0 until smemSubbanks).map { wid =>
|
(0 until smemSubbanks).map { wid =>
|
||||||
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
|
Seq.fill(smemKey.memType match {
|
||||||
managers = Seq(TLSlaveParameters.v2(
|
case TwoPort => 1
|
||||||
name = Some(f"sp_bank${bid}_word${wid}_read_mgr"),
|
case TwoReadOneWrite => 2
|
||||||
address = Seq(AddressSet(
|
})(
|
||||||
smemBase + (smemDepth * smemWidth * bid) + wordSize * wid,
|
TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||||
smemDepth * smemWidth - smemWidth + wordSize - 1
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
|
name = Some(f"sp_bank${bid}_word${wid}_read_mgr"),
|
||||||
|
address = Seq(AddressSet(
|
||||||
|
smemBase + (smemDepth * smemWidth * bid) + wordSize * wid,
|
||||||
|
smemDepth * smemWidth - smemWidth + wordSize - 1
|
||||||
|
)),
|
||||||
|
supports = TLMasterToSlaveTransferSizes(
|
||||||
|
get = TransferSizes(wordSize, wordSize)),
|
||||||
|
fifoId = Some(0)
|
||||||
)),
|
)),
|
||||||
supports = TLMasterToSlaveTransferSizes(
|
beatBytes = wordSize
|
||||||
get = TransferSizes(wordSize, wordSize)),
|
)))
|
||||||
fifoId = Some(0)
|
) ++ Seq(
|
||||||
)),
|
TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||||
beatBytes = wordSize
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
))
|
name = Some(f"sp_bank${bid}_word${wid}_write_mgr"),
|
||||||
), TLManagerNode(Seq(TLSlavePortParameters.v1(
|
address = Seq(AddressSet(
|
||||||
managers = Seq(TLSlaveParameters.v2(
|
smemBase + (smemDepth * smemWidth * bid) + wordSize * wid,
|
||||||
name = Some(f"sp_bank${bid}_word${wid}_write_mgr"),
|
smemDepth * smemWidth - smemWidth + wordSize - 1
|
||||||
address = Seq(AddressSet(
|
)),
|
||||||
smemBase + (smemDepth * smemWidth * bid) + wordSize * wid,
|
supports = TLMasterToSlaveTransferSizes(
|
||||||
smemDepth * smemWidth - smemWidth + wordSize - 1
|
putFull = TransferSizes(wordSize, wordSize),
|
||||||
|
putPartial = TransferSizes(wordSize, wordSize)),
|
||||||
|
fifoId = Some(0)
|
||||||
)),
|
)),
|
||||||
supports = TLMasterToSlaveTransferSizes(
|
beatBytes = wordSize
|
||||||
putFull = TransferSizes(wordSize, wordSize),
|
)))
|
||||||
putPartial = TransferSizes(wordSize, wordSize)),
|
)
|
||||||
fifoId = Some(0)
|
|
||||||
)),
|
|
||||||
beatBytes = wordSize
|
|
||||||
))))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
(0 until smemBanks).map { bank =>
|
(0 until smemBanks).map { bank =>
|
||||||
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
|
Seq.fill(smemKey.memType match {
|
||||||
managers = Seq(TLSlaveParameters.v2(
|
case TwoPort => 1
|
||||||
name = Some(f"sp_bank${bank}_read_mgr"),
|
case TwoReadOneWrite => 2
|
||||||
address = Seq(AddressSet(smemBase + (smemDepth * smemWidth * bank),
|
})(
|
||||||
smemDepth * smemWidth - 1)),
|
TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||||
supports = TLMasterToSlaveTransferSizes(
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
get = TransferSizes(1, smemWidth)),
|
name = Some(f"sp_bank${bank}_read_mgr"),
|
||||||
fifoId = Some(0)
|
address = Seq(AddressSet(smemBase + (smemDepth * smemWidth * bank),
|
||||||
)),
|
smemDepth * smemWidth - 1)),
|
||||||
beatBytes = smemWidth
|
supports = TLMasterToSlaveTransferSizes(
|
||||||
))
|
get = TransferSizes(1, smemWidth)),
|
||||||
), TLManagerNode(Seq(TLSlavePortParameters.v1(
|
fifoId = Some(0)
|
||||||
managers = Seq(TLSlaveParameters.v2(
|
)),
|
||||||
name = Some(f"sp_bank${bank}_write_mgr"),
|
beatBytes = smemWidth
|
||||||
address = Seq(AddressSet(smemBase + (smemDepth * smemWidth * bank),
|
)))
|
||||||
smemDepth * smemWidth - 1)),
|
) ++ Seq(
|
||||||
supports = TLMasterToSlaveTransferSizes(
|
TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||||
putFull = TransferSizes(1, smemWidth),
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
putPartial = TransferSizes(1, smemWidth)),
|
name = Some(f"sp_bank${bank}_write_mgr"),
|
||||||
fifoId = Some(0)
|
address = Seq(AddressSet(smemBase + (smemDepth * smemWidth * bank),
|
||||||
)),
|
smemDepth * smemWidth - 1)),
|
||||||
beatBytes = smemWidth
|
supports = TLMasterToSlaveTransferSizes(
|
||||||
))))
|
putFull = TransferSizes(1, smemWidth),
|
||||||
|
putPartial = TransferSizes(1, smemWidth)),
|
||||||
|
fifoId = Some(0)
|
||||||
|
)),
|
||||||
|
beatBytes = smemWidth
|
||||||
|
)))
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,19 +128,15 @@ class RadianceSharedMem[T <: RadianceSmemNodeProvider](
|
|||||||
|
|
||||||
if (strideByWord) {
|
if (strideByWord) {
|
||||||
smemBankMgrs.grouped(smemSubbanks).zipWithIndex.foreach { case (bankMgrs, bid) =>
|
smemBankMgrs.grouped(smemSubbanks).zipWithIndex.foreach { case (bankMgrs, bid) =>
|
||||||
bankMgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
|
bankMgrs.zipWithIndex.foreach { case (ports, wid) =>
|
||||||
// TODO: this should be a coordinated round robin
|
val readPorts = ports.init
|
||||||
val subbankRXbar = LazyModule(new TLXbar(TLArbiter.lowestIndexFirst))
|
val writePort = ports.last
|
||||||
val subbankWXbar = LazyModule(new TLXbar(TLArbiter.lowestIndexFirst))
|
|
||||||
subbankRXbar.suggestName(s"smem_b${bid}_w${wid}_r_xbar")
|
|
||||||
subbankWXbar.suggestName(s"smem_b${bid}_w${wid}_w_xbar")
|
|
||||||
|
|
||||||
guardMonitors { implicit p =>
|
guardMonitors { implicit p =>
|
||||||
r := subbankRXbar.node
|
|
||||||
w := subbankWXbar.node
|
|
||||||
|
|
||||||
val urXbar = XbarWithExtPolicy(Some(s"ur_b${bid}_w${wid}"))
|
val urXbar = XbarWithExtPolicy(Some(s"ur_b${bid}_w${wid}"))
|
||||||
val uwXbar = XbarWithExtPolicy(Some(s"uw_b${bid}_w${wid}"))
|
val uwXbar = XbarWithExtPolicy(Some(s"uw_b${bid}_w${wid}"))
|
||||||
|
|
||||||
|
// connect policy nodes
|
||||||
val rPolicyNode = ExtPolicyMasterNode(uniformRNodes(bid)(wid).length)
|
val rPolicyNode = ExtPolicyMasterNode(uniformRNodes(bid)(wid).length)
|
||||||
val wPolicyNode = ExtPolicyMasterNode(uniformWNodes(bid)(wid).length)
|
val wPolicyNode = ExtPolicyMasterNode(uniformWNodes(bid)(wid).length)
|
||||||
urXbar.policySlaveNode := rPolicyNode
|
urXbar.policySlaveNode := rPolicyNode
|
||||||
@@ -135,6 +144,7 @@ class RadianceSharedMem[T <: RadianceSmemNodeProvider](
|
|||||||
uniformPolicyNodes.head(bid)(wid) = rPolicyNode
|
uniformPolicyNodes.head(bid)(wid) = rPolicyNode
|
||||||
uniformPolicyNodes.last(bid)(wid) = wPolicyNode
|
uniformPolicyNodes.last(bid)(wid) = wPolicyNode
|
||||||
|
|
||||||
|
// connect clients
|
||||||
(Seq(urXbar, uwXbar) lazyZip uniformNodesIn lazyZip Seq(uniformRNodes, uniformWNodes))
|
(Seq(urXbar, uwXbar) lazyZip uniformNodesIn lazyZip Seq(uniformRNodes, uniformWNodes))
|
||||||
.foreach { case (xbar, idBuf, uNodes) =>
|
.foreach { case (xbar, idBuf, uNodes) =>
|
||||||
|
|
||||||
@@ -145,17 +155,33 @@ class RadianceSharedMem[T <: RadianceSmemNodeProvider](
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uniformNodesOut.head(bid)(wid) = TLIdentityNode()
|
uniformNodesOut.head(bid)(wid) = connectOne(urXbar.node, TLIdentityNode.apply)
|
||||||
uniformNodesOut.last(bid)(wid) = TLIdentityNode()
|
uniformNodesOut.last(bid)(wid) = connectOne(uwXbar.node, TLIdentityNode.apply)
|
||||||
subbankRXbar.node := uniformNodesOut.head(bid)(wid) := urXbar.node
|
|
||||||
subbankWXbar.node := uniformNodesOut.last(bid)(wid) := uwXbar.node
|
|
||||||
|
|
||||||
nonuniformRNodes.foreach( subbankRXbar.node :=* _ )
|
// connect memory
|
||||||
nonuniformWNodes.foreach( subbankWXbar.node :=* _ )
|
smemKey.memType match {
|
||||||
|
case TwoPort => {
|
||||||
|
val subbankRXbar = TLXbar(TLArbiter.lowestIndexFirst, Some(s"smem_b${bid}_w${wid}_r_xbar"))
|
||||||
|
subbankRXbar := uniformNodesOut.head(bid)(wid)
|
||||||
|
nonuniformRNodes.foreach( subbankRXbar :=* _ )
|
||||||
|
readPorts.head := subbankRXbar
|
||||||
|
}
|
||||||
|
case TwoReadOneWrite => {
|
||||||
|
val subbankRXbars = DoubleOutXbar(Seq(uniformNodesOut.head(bid)(wid)) ++ nonuniformRNodes)
|
||||||
|
(readPorts zip subbankRXbars).foreach { case (rp, sbx) => rp := sbx }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val subbankWXbar = TLXbar(TLArbiter.lowestIndexFirst, Some(s"smem_b${bid}_w${wid}_w_xbar"))
|
||||||
|
writePort := subbankWXbar
|
||||||
|
subbankWXbar := uniformNodesOut.last(bid)(wid)
|
||||||
|
nonuniformWNodes.foreach( subbankWXbar :=* _ )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else { // not stride by word
|
} else { // not stride by word
|
||||||
|
require(smemKey.memType == TwoPort, "double read ports not implemented")
|
||||||
|
|
||||||
val smemRXbar = TLXbar()
|
val smemRXbar = TLXbar()
|
||||||
val smemWXbar = TLXbar()
|
val smemWXbar = TLXbar()
|
||||||
|
|
||||||
@@ -184,23 +210,25 @@ class RadianceSharedMemImp[T <: RadianceSmemNodeProvider](outer: RadianceSharedM
|
|||||||
|
|
||||||
val smNodesImp = outer.providerImp.map(impFn => impFn(outer.smNodes))
|
val smNodesImp = outer.providerImp.map(impFn => impFn(outer.smNodes))
|
||||||
|
|
||||||
def makeBuffer[U <: Data](mem: TwoPortSyncMem[U], rNode: TLBundle, rEdge: TLEdgeIn,
|
case class ReadPort[U <: Data](ren: Bool, data: U)
|
||||||
wNode: TLBundle, wEdge: TLEdgeIn): Unit = {
|
case class WritePort[U <: Data](wen: Bool, data: U, mask: UInt)
|
||||||
mem.io.ren := rNode.a.fire
|
|
||||||
|
|
||||||
val dataPipeIn = Wire(DecoupledIO(mem.io.rdata.cloneType))
|
def makeReadBuffer[U <: Data](port: ReadPort[U], rNode: TLBundle, rEdge: TLEdgeIn): Unit = {
|
||||||
dataPipeIn.valid := RegNext(mem.io.ren)
|
port.ren := rNode.a.fire
|
||||||
dataPipeIn.bits := mem.io.rdata
|
|
||||||
|
val dataPipeIn = Wire(DecoupledIO(port.data.cloneType))
|
||||||
|
dataPipeIn.valid := RegNext(port.ren)
|
||||||
|
dataPipeIn.bits := port.data
|
||||||
|
|
||||||
val metadataPipeIn = Wire(DecoupledIO(new Bundle {
|
val metadataPipeIn = Wire(DecoupledIO(new Bundle {
|
||||||
val source = rNode.a.bits.source.cloneType
|
val source = rNode.a.bits.source.cloneType
|
||||||
val size = rNode.a.bits.size.cloneType
|
val size = rNode.a.bits.size.cloneType
|
||||||
}))
|
}))
|
||||||
metadataPipeIn.valid := mem.io.ren
|
metadataPipeIn.valid := port.ren
|
||||||
metadataPipeIn.bits.source := rNode.a.bits.source
|
metadataPipeIn.bits.source := rNode.a.bits.source
|
||||||
metadataPipeIn.bits.size := rNode.a.bits.size
|
metadataPipeIn.bits.size := rNode.a.bits.size
|
||||||
|
|
||||||
val sramReadBackupReg = RegInit(0.U.asTypeOf(Valid(mem.io.rdata.cloneType)))
|
val sramReadBackupReg = RegInit(0.U.asTypeOf(Valid(port.data.cloneType)))
|
||||||
|
|
||||||
val dataPipeInst = Module(new Pipeline(dataPipeIn.bits.cloneType, 1)())
|
val dataPipeInst = Module(new Pipeline(dataPipeIn.bits.cloneType, 1)())
|
||||||
dataPipeInst.io.in <> dataPipeIn
|
dataPipeInst.io.in <> dataPipeIn
|
||||||
@@ -214,12 +242,12 @@ class RadianceSharedMemImp[T <: RadianceSmemNodeProvider](outer: RadianceSharedM
|
|||||||
assert(!sramReadBackupReg.valid) // backup reg should be empty
|
assert(!sramReadBackupReg.valid) // backup reg should be empty
|
||||||
assert(!metadataPipeIn.ready) // metadata should be filled previous cycle
|
assert(!metadataPipeIn.ready) // metadata should be filled previous cycle
|
||||||
sramReadBackupReg.valid := true.B
|
sramReadBackupReg.valid := true.B
|
||||||
sramReadBackupReg.bits := mem.io.rdata
|
sramReadBackupReg.bits := port.data
|
||||||
}.otherwise {
|
}.otherwise {
|
||||||
assert(dataPipeIn.ready || !dataPipeIn.valid) // do not skip any response
|
assert(dataPipeIn.ready || !dataPipeIn.valid) // do not skip any response
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(metadataPipeIn.fire || !mem.io.ren) // when requesting sram, metadata needs to be ready
|
assert(metadataPipeIn.fire || !port.ren) // when requesting sram, metadata needs to be ready
|
||||||
assert(rNode.d.fire === metadataPipe.fire) // metadata dequeues iff D fires
|
assert(rNode.d.fire === metadataPipe.fire) // metadata dequeues iff D fires
|
||||||
|
|
||||||
// when D becomes ready, and data pipe has emptied, time for backup to empty
|
// when D becomes ready, and data pipe has emptied, time for backup to empty
|
||||||
@@ -238,11 +266,12 @@ class RadianceSharedMemImp[T <: RadianceSmemNodeProvider](outer: RadianceSharedM
|
|||||||
rNode.a.ready := rNode.d.ready && !(dataPipe.valid && sramReadBackupReg.valid)
|
rNode.a.ready := rNode.d.ready && !(dataPipe.valid && sramReadBackupReg.valid)
|
||||||
dataPipe.ready := rNode.d.ready
|
dataPipe.ready := rNode.d.ready
|
||||||
metadataPipe.ready := rNode.d.ready
|
metadataPipe.ready := rNode.d.ready
|
||||||
|
}
|
||||||
|
|
||||||
// WRITE
|
def makeWriteBuffer[U <: Data](port: WritePort[U], wNode: TLBundle, wEdge: TLEdgeIn): Unit = {
|
||||||
mem.io.wen := RegNext(wNode.a.fire)
|
port.wen := RegNext(wNode.a.fire)
|
||||||
mem.io.wdata := RegNext(wNode.a.bits.data)
|
port.data := RegNext(wNode.a.bits.data)
|
||||||
mem.io.mask := RegNext(wNode.a.bits.mask)
|
port.mask := RegNext(wNode.a.bits.mask)
|
||||||
|
|
||||||
val writeResp = Wire(Flipped(wNode.d.cloneType))
|
val writeResp = Wire(Flipped(wNode.d.cloneType))
|
||||||
writeResp.bits := wEdge.AccessAck(wNode.a.bits)
|
writeResp.bits := wEdge.AccessAck(wNode.a.bits)
|
||||||
@@ -251,21 +280,79 @@ class RadianceSharedMemImp[T <: RadianceSmemNodeProvider](outer: RadianceSharedM
|
|||||||
wNode.d <> Queue(writeResp, 2)
|
wNode.d <> Queue(writeResp, 2)
|
||||||
}
|
}
|
||||||
|
|
||||||
// read/write access counter for smem banks
|
|
||||||
val Seq(smemReadsPerCycle, smemWritesPerCycle) = outer.smemBankMgrs.transpose.map { rw =>
|
|
||||||
VecInit(rw.map(_.in.head._1.a.fire.asUInt)).reduceTree(_ +& _)
|
|
||||||
}
|
|
||||||
val smemReadCounter = RegInit(0.U(32.W))
|
|
||||||
val smemWriteCounter = RegInit(0.U(32.W))
|
|
||||||
smemReadCounter := smemReadCounter +& smemReadsPerCycle
|
|
||||||
smemWriteCounter := smemWriteCounter +& smemWritesPerCycle
|
|
||||||
dontTouch(smemReadCounter)
|
|
||||||
dontTouch(smemWriteCounter)
|
|
||||||
|
|
||||||
if (outer.strideByWord) {
|
if (outer.strideByWord) {
|
||||||
val uniformFires = Seq.fill(2)(VecInit.fill(outer.smemBanks)(VecInit.fill(outer.smemSubbanks)(false.B)))
|
val uniformFires = Seq.fill(2)(VecInit.fill(outer.smemBanks)(VecInit.fill(outer.smemSubbanks)(false.B)))
|
||||||
|
|
||||||
|
// instantiate sram banks and connect
|
||||||
outer.smemBankMgrs.grouped(outer.smemSubbanks).zipWithIndex.foreach { case (bankMgrs, bid) =>
|
outer.smemBankMgrs.grouped(outer.smemSubbanks).zipWithIndex.foreach { case (bankMgrs, bid) =>
|
||||||
|
|
||||||
|
bankMgrs.zipWithIndex.foreach { case (ports, wid) =>
|
||||||
|
val readPorts = ports.init
|
||||||
|
val writePort = ports.last
|
||||||
|
|
||||||
|
assert(!readPorts.flatMap(_.portParams.map(_.anySupportPutFull)).reduce(_ || _))
|
||||||
|
assert(!writePort.portParams.map(_.anySupportGet).reduce(_ || _))
|
||||||
|
|
||||||
|
val memDepth = outer.smemDepth
|
||||||
|
val memWidth = outer.smemWidth
|
||||||
|
val wordWidth = outer.wordSize
|
||||||
|
|
||||||
|
outer.smemKey.memType match {
|
||||||
|
case TwoPort =>
|
||||||
|
val mem = TwoPortSyncMem(
|
||||||
|
n = memDepth,
|
||||||
|
t = UInt((wordWidth * 8).W),
|
||||||
|
)
|
||||||
|
// TODO: bring in cluster id
|
||||||
|
// mem.suggestName(s"rad_smem_cl${outer.thisClusterParams.clusterId}_b${bid}_w${wid}")
|
||||||
|
|
||||||
|
val (rNode, rEdge) = readPorts.head.in.head
|
||||||
|
val (wNode, wEdge) = writePort.in.head
|
||||||
|
|
||||||
|
// address format is
|
||||||
|
// [ smem_base | bank_id | line_id | word_id | byte_offset ]
|
||||||
|
// line_id is used to index into the SRAMs
|
||||||
|
mem.io.raddr := (rNode.a.bits.address & (memDepth * memWidth - 1).U) >> log2Ceil(memWidth).U
|
||||||
|
mem.io.waddr := RegNext((wNode.a.bits.address & (memDepth * memWidth - 1).U) >> log2Ceil(memWidth).U)
|
||||||
|
|
||||||
|
assert((bid.U === ((rNode.a.bits.address & (memDepth * memWidth * outer.smemBanks - 1).U) >>
|
||||||
|
log2Ceil(memDepth * memWidth).U).asUInt) || !rNode.a.valid, "bank id mismatch with request")
|
||||||
|
assert((wid.U === ((rNode.a.bits.address & (memWidth - 1).U) >>
|
||||||
|
log2Ceil(wordWidth).U).asUInt) || !rNode.a.valid, "word id mismatch with request")
|
||||||
|
|
||||||
|
makeReadBuffer(ReadPort(mem.io.ren, mem.io.rdata), rNode, rEdge)
|
||||||
|
makeWriteBuffer(WritePort(mem.io.wen, mem.io.wdata, mem.io.mask), wNode, wEdge)
|
||||||
|
|
||||||
|
case TwoReadOneWrite =>
|
||||||
|
val mem = TwoReadOneWriteSyncMem(
|
||||||
|
n = memDepth,
|
||||||
|
t = UInt((wordWidth * 8).W),
|
||||||
|
)
|
||||||
|
|
||||||
|
val (rNode0, rEdge0) = readPorts.head.in.head
|
||||||
|
val (rNode1, rEdge1) = readPorts.last.in.head
|
||||||
|
val (wNode, wEdge) = writePort.in.head
|
||||||
|
|
||||||
|
mem.io.raddr0 := (rNode0.a.bits.address & (memDepth * memWidth - 1).U) >> log2Ceil(memWidth).U
|
||||||
|
mem.io.raddr1 := (rNode1.a.bits.address & (memDepth * memWidth - 1).U) >> log2Ceil(memWidth).U
|
||||||
|
mem.io.waddr := RegNext((wNode.a.bits.address & (memDepth * memWidth - 1).U) >> log2Ceil(memWidth).U)
|
||||||
|
|
||||||
|
makeReadBuffer(ReadPort(mem.io.ren0, mem.io.rdata0), rNode0, rEdge0)
|
||||||
|
makeReadBuffer(ReadPort(mem.io.ren1, mem.io.rdata1), rNode1, rEdge1)
|
||||||
|
makeWriteBuffer(WritePort(mem.io.wen, mem.io.wdata, mem.io.mask), wNode, wEdge)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// set up uniform mux selects
|
||||||
|
Seq.tabulate(outer.smemBanks) { bid =>
|
||||||
|
// note down fire here so the round-robin knows when an input is selected
|
||||||
|
Seq.tabulate(outer.smemSubbanks) { wid =>
|
||||||
|
(uniformFires zip outer.uniformNodesOut).foreach { case (uf, n) =>
|
||||||
|
uf(bid)(wid) := n(bid)(wid).in.head._1.a.fire
|
||||||
|
}
|
||||||
|
}
|
||||||
// have a uniform hint to all subbanks in a bank
|
// have a uniform hint to all subbanks in a bank
|
||||||
val wordSelects1h = Seq(
|
val wordSelects1h = Seq(
|
||||||
Wire(UInt(outer.uniformNodesIn.head(bid).head.length.W)).suggestName(s"ws_r_b${bid}"),
|
Wire(UInt(outer.uniformNodesIn.head(bid).head.length.W)).suggestName(s"ws_r_b${bid}"),
|
||||||
@@ -275,43 +362,6 @@ class RadianceSharedMemImp[T <: RadianceSmemNodeProvider](outer: RadianceSharedM
|
|||||||
VecInit(wordsInIdx.toSeq).asUInt.orR
|
VecInit(wordsInIdx.toSeq).asUInt.orR
|
||||||
}.toSeq).asUInt.suggestName(s"valid_sources_rw${rw}_b${bid}")
|
}.toSeq).asUInt.suggestName(s"valid_sources_rw${rw}_b${bid}")
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(bankMgrs.flatten.size == 2/* read and write */ * outer.smemSubbanks)
|
|
||||||
bankMgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
|
|
||||||
assert(!r.portParams.map(_.anySupportPutFull).reduce(_ || _))
|
|
||||||
assert(!w.portParams.map(_.anySupportGet).reduce(_ || _))
|
|
||||||
|
|
||||||
val memDepth = outer.smemDepth
|
|
||||||
val memWidth = outer.smemWidth
|
|
||||||
val wordWidth = outer.wordSize
|
|
||||||
|
|
||||||
val mem = TwoPortSyncMem(
|
|
||||||
n = memDepth,
|
|
||||||
t = UInt((wordWidth * 8).W),
|
|
||||||
)
|
|
||||||
// TODO: bring in cluster id
|
|
||||||
// mem.suggestName(s"rad_smem_cl${outer.thisClusterParams.clusterId}_b${bid}_w${wid}")
|
|
||||||
|
|
||||||
val (rNode, rEdge) = r.in.head
|
|
||||||
val (wNode, wEdge) = w.in.head
|
|
||||||
|
|
||||||
// address format is
|
|
||||||
// [ smem_base | bank_id | line_id | word_id | byte_offset ]
|
|
||||||
// line_id is used to index into the SRAMs
|
|
||||||
mem.io.raddr := (rNode.a.bits.address & (memDepth * memWidth - 1).U) >> log2Ceil(memWidth).U
|
|
||||||
mem.io.waddr := RegNext((wNode.a.bits.address & (memDepth * memWidth - 1).U) >> log2Ceil(memWidth).U)
|
|
||||||
|
|
||||||
assert((bid.U === ((rNode.a.bits.address & (memDepth * memWidth * outer.smemBanks - 1).U) >>
|
|
||||||
log2Ceil(memDepth * memWidth).U).asUInt) || !rNode.a.valid, "bank id mismatch with request")
|
|
||||||
assert((wid.U === ((rNode.a.bits.address & (memWidth - 1).U) >>
|
|
||||||
log2Ceil(wordWidth).U).asUInt) || !rNode.a.valid, "word id mismatch with request")
|
|
||||||
|
|
||||||
makeBuffer(mem, rNode, rEdge, wNode, wEdge)
|
|
||||||
|
|
||||||
(uniformFires zip outer.uniformNodesOut).foreach { case (uf, n) =>
|
|
||||||
uf(bid)(wid) := n(bid)(wid).in.head._1.a.fire
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// use round robin to decide uniform select
|
// use round robin to decide uniform select
|
||||||
(wordSelects1h zip Seq(validRSources, validWSources)).zipWithIndex.foreach { case ((ws, vs), rw) =>
|
(wordSelects1h zip Seq(validRSources, validWSources)).zipWithIndex.foreach { case ((ws, vs), rw) =>
|
||||||
ws := TLArbiter.roundRobin(vs.getWidth, vs, uniformFires(rw)(bid).asUInt.orR)
|
ws := TLArbiter.roundRobin(vs.getWidth, vs, uniformFires(rw)(bid).asUInt.orR)
|
||||||
@@ -331,7 +381,7 @@ class RadianceSharedMemImp[T <: RadianceSmemNodeProvider](outer: RadianceSharedM
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// set policy to use the uniform select as hint
|
||||||
(outer.uniformPolicyNodes zip wordSelects1h).zipWithIndex.foreach { case ((nodesBw, ws), rw) =>
|
(outer.uniformPolicyNodes zip wordSelects1h).zipWithIndex.foreach { case ((nodesBw, ws), rw) =>
|
||||||
nodesBw(bid).foreach { policy =>
|
nodesBw(bid).foreach { policy =>
|
||||||
policy.out.head._1.hint := ws
|
policy.out.head._1.hint := ws
|
||||||
@@ -355,8 +405,20 @@ class RadianceSharedMemImp[T <: RadianceSmemNodeProvider](outer: RadianceSharedM
|
|||||||
mem.io.raddr := (rNode.a.bits.address ^ outer.smemBase.U) >> log2Ceil(memWidth).U
|
mem.io.raddr := (rNode.a.bits.address ^ outer.smemBase.U) >> log2Ceil(memWidth).U
|
||||||
mem.io.waddr := RegNext((wNode.a.bits.address ^ outer.smemBase.U) >> log2Ceil(memWidth).U)
|
mem.io.waddr := RegNext((wNode.a.bits.address ^ outer.smemBase.U) >> log2Ceil(memWidth).U)
|
||||||
|
|
||||||
makeBuffer(mem, rNode, rEdge, wNode, wEdge)
|
makeReadBuffer(ReadPort(mem.io.ren, mem.io.rdata), rNode, rEdge)
|
||||||
|
makeWriteBuffer(WritePort(mem.io.wen, mem.io.wdata, mem.io.mask), wNode, wEdge)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// read/write access counter for smem banks
|
||||||
|
val smemAccessesPerCycle = outer.smemBankMgrs.transpose.map { rw =>
|
||||||
|
VecInit(rw.map(_.in.head._1.a.fire.asUInt)).reduceTree(_ +& _)
|
||||||
|
}
|
||||||
|
val smemReadCounter = RegInit(0.U(32.W))
|
||||||
|
val smemWriteCounter = RegInit(0.U(32.W))
|
||||||
|
smemReadCounter := smemReadCounter +& smemAccessesPerCycle.init.reduce(_ +& _)
|
||||||
|
smemWriteCounter := smemWriteCounter +& smemAccessesPerCycle.last
|
||||||
|
dontTouch(smemReadCounter)
|
||||||
|
dontTouch(smemWriteCounter)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user