word strided subbanks, parallel subbank access for gemmini and all-to-all xbar parallel access for radiance smem
This commit is contained in:
144
src/main/scala/radiance/memory/DistributorNode.scala
Normal file
144
src/main/scala/radiance/memory/DistributorNode.scala
Normal file
@@ -0,0 +1,144 @@
|
||||
package radiance.memory
|
||||
|
||||
import chisel3._
|
||||
import chisel3.experimental.SourceInfo
|
||||
import chisel3.util._
|
||||
import freechips.rocketchip.diplomacy._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.util.BundleField
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
|
||||
|
||||
class DistributorNode(from: Int, to: Int)(implicit p: Parameters) extends LazyModule {
|
||||
require(isPow2(from) && isPow2(to) && (from >= to), "invalid distributor node parameters")
|
||||
println(s"distributor node to segment from $from into $to")
|
||||
val num_clients = from / to
|
||||
|
||||
val node = TLNexusNode(clientFn = seq => {
|
||||
require(seq.map(_.masters.size).sum == 1, s"there should only be one client to a distributor node, found ${seq.map(_.masters.size).sum}")
|
||||
val master = seq.head.masters.head
|
||||
require(isPow2(master.sourceId.size))
|
||||
seq.head.v1copy(
|
||||
clients = Seq.tabulate(num_clients)(i => master.v2copy(
|
||||
name = s"${name}_dist_client_$i",
|
||||
emits = TLMasterToSlaveTransferSizes(
|
||||
get = TransferSizes(to, to),
|
||||
putFull = TransferSizes(to, to),
|
||||
putPartial = TransferSizes(to, to)
|
||||
),
|
||||
sourceId = master.sourceId.shift(master.sourceId.size * i)
|
||||
))
|
||||
)
|
||||
}, managerFn = seq => {
|
||||
seq.head.v1copy(
|
||||
responseFields = BundleField.union(seq.flatMap(_.responseFields)),
|
||||
requestKeys = seq.flatMap(_.requestKeys).distinct,
|
||||
minLatency = seq.map(_.minLatency).min,
|
||||
endSinkId = TLXbar.mapOutputIds(seq).map(_.end).max,
|
||||
managers = Seq(TLSlaveParameters.v2(
|
||||
name = Some(s"${name}_manager"),
|
||||
address = AddressSet.unify(seq.flatMap(_.slaves.flatMap(_.address))),
|
||||
supports = TLMasterToSlaveTransferSizes(
|
||||
get = TransferSizes(from, from),
|
||||
putFull = TransferSizes(from, from),
|
||||
putPartial = TransferSizes(from, from)
|
||||
),
|
||||
fifoId = Some(0),
|
||||
)),
|
||||
beatBytes = from
|
||||
)
|
||||
})
|
||||
|
||||
lazy val module = new LazyModuleImp(this) {
|
||||
val cn = node.in.head._1
|
||||
val mn = node.out.map(_._1)
|
||||
println(f"$name node in size ${node.in.size}, out size ${node.out.size}")
|
||||
assert(node.out.size == num_clients, s"got ${node.out.size} clients instead of $num_clients")
|
||||
|
||||
// A channel
|
||||
val ca = cn.a.bits
|
||||
mn.map(_.a.bits).zipWithIndex.foreach { case (m, i) =>
|
||||
println(s"$i master source id width ${m.source.getWidth}, client source id width ${ca.source.getWidth}")
|
||||
m.opcode := ca.opcode
|
||||
m.param := ca.param
|
||||
m.user := ca.user
|
||||
m.source := Cat(i.U(log2Ceil(num_clients).W), ca.source)
|
||||
m.address := ca.address + (to * i).U
|
||||
m.mask := ca.mask((i + 1) * to - 1, i * to)
|
||||
m.data := ca.data((i + 1) * to * 8 - 1, i * to * 8)
|
||||
m.size := log2Ceil(to).U
|
||||
}
|
||||
mn.map(_.a.valid).foreach(_ := cn.a.valid)
|
||||
cn.a.ready := mn.map(_.a.ready).reduce(_ && _)
|
||||
|
||||
// D channel
|
||||
val cd = cn.d.bits
|
||||
cd.size := log2Ceil(from).U
|
||||
val partialWait = RegInit(false.B)
|
||||
val arrived = RegInit(0.U(num_clients.W))
|
||||
val cdReg = RegInit(0.U.asTypeOf(cd.cloneType))
|
||||
|
||||
def setMetadata(to: TLBundleD, from: TLBundleD): Unit = {
|
||||
to.opcode := from.opcode
|
||||
to.user := from.user
|
||||
to.param := from.param
|
||||
to.sink := from.sink
|
||||
to.denied := from.denied
|
||||
to.corrupt := from.corrupt
|
||||
to.source := from.source(to.source.getWidth - 1, 0)
|
||||
}
|
||||
|
||||
def partialData: UInt = VecInit(mn.map(_.d).map(d => Mux(d.fire, d.bits.data, 0.U(d.bits.data.getWidth.W)))).asUInt
|
||||
def partialValid: UInt = VecInit(mn.map(_.d.fire)).asUInt
|
||||
|
||||
mn.map(_.d.ready).zip(arrived.asBools).foreach { case (r, a) =>
|
||||
r := cn.d.ready && (!partialWait || !a) // if waiting for partial response, ready only if not arrived yet
|
||||
}
|
||||
|
||||
// TODO: might need coverage test for this
|
||||
when (!partialWait) {
|
||||
cn.d.valid := false.B
|
||||
partialWait := false.B
|
||||
when (partialValid.asBools.reduce(_ && _)) {
|
||||
// all valids, immediately return both metadata and data
|
||||
cn.d.valid := true.B
|
||||
cd.data := Cat(mn.map(_.d.bits.data).reverse)
|
||||
setMetadata(cd, mn.head.d.bits)
|
||||
assert(cd.data === partialData, "sanity check")
|
||||
}.elsewhen (partialValid.asBools.reduce(_ || _)) {
|
||||
// at least 1 valid: enter partial valid state, store partial data into regs
|
||||
partialWait := true.B
|
||||
arrived := partialValid
|
||||
cdReg.data := partialData
|
||||
when (mn.head.d.valid) { setMetadata(cdReg, mn.head.d.bits) }
|
||||
}
|
||||
}.otherwise {
|
||||
cn.d.valid := false.B
|
||||
partialWait := true.B
|
||||
when ((arrived | partialValid).asBools.reduce(_ && _)) {
|
||||
// all valids received now
|
||||
when (mn.head.d.valid) {
|
||||
setMetadata(cd, mn.head.d.bits)
|
||||
}.otherwise {
|
||||
cd := cdReg
|
||||
}
|
||||
cn.d.valid := true.B
|
||||
cd.data := cdReg.data | partialData
|
||||
partialWait := false.B
|
||||
cdReg := 0.U.asTypeOf(cdReg.cloneType)
|
||||
arrived := 0.U
|
||||
}.elsewhen (partialValid.asBools.reduce(_ || _)) {
|
||||
// update partial data
|
||||
arrived := arrived | partialValid
|
||||
cdReg.data := cdReg.data | partialData
|
||||
when (mn.head.d.valid) { setMetadata(cdReg, mn.head.d.bits) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object DistributorNode {
|
||||
def apply(from: Int, to: Int)(implicit p: Parameters, valName: ValName, sourceInfo: SourceInfo): TLNexusNode = {
|
||||
LazyModule(new DistributorNode(from, to)).node
|
||||
}
|
||||
}
|
||||
@@ -27,7 +27,7 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten
|
||||
require(isPow2(vis_mask + 1) || vis_mask == -1)
|
||||
println(f"combined visibilities of splitter memory node clients: ${vis_min}, ${vis_mask}")
|
||||
|
||||
seq(0).v1copy(
|
||||
seq.head.v1copy(
|
||||
echoFields = BundleField.union(seq.flatMap(_.echoFields)),
|
||||
requestFields = BundleField.union(seq.flatMap(_.requestFields)),
|
||||
responseKeys = seq.flatMap(_.responseKeys).distinct,
|
||||
@@ -56,9 +56,8 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten
|
||||
)
|
||||
},
|
||||
managerFn = { seq =>
|
||||
println(seq.flatMap(_.slaves.map(_.supports)))
|
||||
// val fifoIdFactory = TLXbar.relabeler()
|
||||
seq(0).v1copy(
|
||||
seq.head.v1copy(
|
||||
responseFields = BundleField.union(seq.flatMap(_.responseFields)),
|
||||
requestKeys = seq.flatMap(_.requestKeys).distinct,
|
||||
minLatency = seq.map(_.minLatency).min,
|
||||
@@ -81,7 +80,7 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten
|
||||
val u_out = node.out
|
||||
val u_in = node.in
|
||||
assert(u_out.length == 2)
|
||||
println(f"gemmini unified memory node has ${u_in.length} incoming client(s)")
|
||||
println(f"${name} has ${u_in.length} incoming client(s)")
|
||||
|
||||
val r_out = u_out.head
|
||||
val w_out = u_out.last
|
||||
|
||||
@@ -7,13 +7,11 @@ import chisel3._
|
||||
import chisel3.util._
|
||||
import freechips.rocketchip.diplomacy._
|
||||
import freechips.rocketchip.prci.ClockSinkParameters
|
||||
import freechips.rocketchip.regmapper.RegField
|
||||
import freechips.rocketchip.subsystem._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.util.BundleField
|
||||
import gemmini._
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
import radiance.memory.RWSplitterNode
|
||||
import radiance.memory._
|
||||
|
||||
case class RadianceClusterParams(
|
||||
val clusterId: Int,
|
||||
@@ -69,50 +67,63 @@ class RadianceCluster (
|
||||
// TODO: stride by word
|
||||
val unified_mem_read_node = TLIdentityNode()
|
||||
val unified_mem_write_node = TLIdentityNode()
|
||||
|
||||
val spad_data_len = gemminiConfig.sp_width / 8
|
||||
val acc_data_len = gemminiConfig.sp_width / gemminiConfig.inputType.getWidth * gemminiConfig.accType.getWidth / 8
|
||||
val max_data_len = spad_data_len // max acc_data_len
|
||||
|
||||
val smem_base = gemminiConfig.tl_ext_mem_base
|
||||
val smem_depth = gemminiConfig.sp_bank_entries * spad_data_len / max_data_len
|
||||
val smem_width = max_data_len
|
||||
val smem_width = spad_data_len
|
||||
val smem_depth = gemminiConfig.sp_bank_entries * spad_data_len / smem_width
|
||||
val smem_banks = gemminiConfig.sp_banks
|
||||
val smem_subbanks = 1
|
||||
val smem_subbanks = smem_width / wordSize
|
||||
val smem_size = smem_width * smem_depth * smem_banks
|
||||
|
||||
val splitter_node = RWSplitterNode()
|
||||
val stride_by_word = true
|
||||
|
||||
unified_mem_read_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_read_nodes
|
||||
unified_mem_write_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_write_nodes
|
||||
unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node
|
||||
// unified_mem_read_node :=* TLWidthWidget(acc_data_len) :=* acc_read_nodes
|
||||
// unified_mem_write_node :=* TLWidthWidget(acc_data_len) :=* acc_write_nodes
|
||||
val radiance_smem_fanout = radianceTiles.flatMap {
|
||||
_.smemNodes.map { m =>
|
||||
val smem_fanout_xbar = TLXbar()
|
||||
smem_fanout_xbar :=* m
|
||||
smem_fanout_xbar
|
||||
}
|
||||
}
|
||||
|
||||
// assert(splitter_node.in.map(_._2.slave.slaves.flatMap(_.supports.get)))
|
||||
|
||||
/* address = Seq(AddressSet(gemmini.spad_base, smem_depth * smem_width * smem_banks - 1)),
|
||||
supports = TLMasterToSlaveTransferSizes(
|
||||
get = TransferSizes(1, smem_width),
|
||||
putFull = TransferSizes(1, smem_width),
|
||||
putPartial = TransferSizes(1, smem_width)),*/
|
||||
|
||||
unified_mem_read_node := TLWidthWidget(spad_data_len) := splitter_node
|
||||
unified_mem_write_node := TLWidthWidget(spad_data_len) := splitter_node
|
||||
|
||||
val stride_by_word = false
|
||||
require(isPow2(smem_banks))
|
||||
// collection of read and write managers for each sram (sub)bank
|
||||
val smem_bank_mgrs : Seq[Seq[TLManagerNode]] = if (stride_by_word) {
|
||||
assert(false, "TODO under construction")
|
||||
// assert((config.sp_capacity match { case CapacityInKilobytes(kb) => kb * 1024}) ==
|
||||
// gemmini.config.sp_bank_entries * spad_data_len / max_data_len * gemmini.config.sp_banks * max_data_len)
|
||||
(0 until gemminiConfig.sp_banks).map { bank =>
|
||||
LazyModule(new TLRAM(
|
||||
address = AddressSet(max_data_len * bank,
|
||||
((gemminiConfig.sp_bank_entries * spad_data_len / max_data_len - 1) * gemminiConfig.sp_banks + bank)
|
||||
* max_data_len + (max_data_len - 1)),
|
||||
beatBytes = max_data_len
|
||||
))
|
||||
}.map(x => Seq(x.node))
|
||||
require(isPow2(smem_subbanks))
|
||||
(0 until smem_banks).flatMap { bid =>
|
||||
(0 until smem_subbanks).map { wid =>
|
||||
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||
managers = Seq(TLSlaveParameters.v2(
|
||||
name = Some(f"sp_bank${bid}_word${wid}_read_mgr"),
|
||||
address = Seq(AddressSet(
|
||||
smem_base + (smem_depth * smem_width * bid) + wordSize * wid,
|
||||
smem_depth * smem_width - smem_width + wordSize - 1
|
||||
)),
|
||||
supports = TLMasterToSlaveTransferSizes(
|
||||
get = TransferSizes(wordSize, wordSize)),
|
||||
fifoId = Some(0)
|
||||
)),
|
||||
beatBytes = wordSize
|
||||
))
|
||||
), TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||
managers = Seq(TLSlaveParameters.v2(
|
||||
name = Some(f"sp_bank${bid}_word${wid}_write_mgr"),
|
||||
address = Seq(AddressSet(
|
||||
smem_base + (smem_depth * smem_width * bid) + wordSize * wid,
|
||||
smem_depth * smem_width - smem_width + wordSize - 1
|
||||
)),
|
||||
supports = TLMasterToSlaveTransferSizes(
|
||||
putFull = TransferSizes(wordSize, wordSize),
|
||||
putPartial = TransferSizes(wordSize, wordSize)),
|
||||
fifoId = Some(0)
|
||||
)),
|
||||
beatBytes = wordSize
|
||||
))))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
require(isPow2(smem_banks))
|
||||
(0 until smem_banks).map { bank =>
|
||||
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||
managers = Seq(TLSlaveParameters.v2(
|
||||
@@ -124,39 +135,131 @@ class RadianceCluster (
|
||||
fifoId = Some(0)
|
||||
)),
|
||||
beatBytes = smem_width
|
||||
))),
|
||||
TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||
managers = Seq(TLSlaveParameters.v2(
|
||||
name = Some(f"sp_bank${bank}_write_mgr"),
|
||||
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank),
|
||||
smem_depth * smem_width - 1)),
|
||||
supports = TLMasterToSlaveTransferSizes(
|
||||
putFull = TransferSizes(1, smem_width),
|
||||
putPartial = TransferSizes(1, smem_width)),
|
||||
fifoId = Some(0)
|
||||
)),
|
||||
beatBytes = smem_width
|
||||
))))
|
||||
))
|
||||
), TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||
managers = Seq(TLSlaveParameters.v2(
|
||||
name = Some(f"sp_bank${bank}_write_mgr"),
|
||||
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank),
|
||||
smem_depth * smem_width - 1)),
|
||||
supports = TLMasterToSlaveTransferSizes(
|
||||
putFull = TransferSizes(1, smem_width),
|
||||
putPartial = TransferSizes(1, smem_width)),
|
||||
fifoId = Some(0)
|
||||
)),
|
||||
beatBytes = smem_width
|
||||
))))
|
||||
}
|
||||
}
|
||||
|
||||
val smem_r_xbar = TLXbar()
|
||||
val smem_w_xbar = TLXbar()
|
||||
smem_r_xbar :=* unified_mem_read_node
|
||||
smem_w_xbar :=* unified_mem_write_node
|
||||
if (stride_by_word) {
|
||||
val spad_read_nodes = Seq.fill(smem_banks) {
|
||||
val r_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||
r_dist := gemmini.spad_read_nodes
|
||||
Seq.fill(smem_subbanks) {
|
||||
val id_node = TLIdentityNode()
|
||||
id_node := r_dist
|
||||
id_node
|
||||
}
|
||||
}
|
||||
val spad_write_nodes = Seq.fill(smem_banks) {
|
||||
val w_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||
w_dist := gemmini.spad_write_nodes
|
||||
Seq.fill(smem_subbanks) {
|
||||
val id_node = TLIdentityNode()
|
||||
id_node := w_dist
|
||||
id_node
|
||||
}
|
||||
}
|
||||
val ws_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||
ws_dist := gemmini.spad.spad_writer.node // this is the dma write node
|
||||
val spad_sp_write_nodes = Seq.fill(smem_subbanks) {
|
||||
val ws_xbar = TLXbar() // fanout to 4 banks
|
||||
ws_xbar := ws_dist
|
||||
ws_xbar
|
||||
}
|
||||
|
||||
smem_bank_mgrs.foreach { mem =>
|
||||
require(mem.length == 2)
|
||||
mem.head := smem_r_xbar
|
||||
mem.last := TLFragmenter(spad_data_len, max_write_width_bytes) := smem_w_xbar
|
||||
// spad_read_nodes.flatten.foreach(node => unified_mem_read_node :=* node)
|
||||
// spad_write_nodes.flatten.foreach(node => unified_mem_write_node :=* node)
|
||||
// spad_sp_write_nodes.foreach(node => unified_mem_write_node :=* node)
|
||||
// unified_mem_write_node :=* DistributorNode(from = smem_width, to = wordSize) :=* gemmini.spad.spad_writer.node // this is the dma write node
|
||||
// unified_mem_read_node :=* TLWidthWidget(acc_data_len) :=* acc_read_nodes
|
||||
// unified_mem_write_node :=* TLWidthWidget(acc_data_len) :=* acc_write_nodes
|
||||
|
||||
// these nodes access an entire line simultaneously
|
||||
val uniform_r_nodes: Seq[Seq[Seq[TLNode]]] = spad_read_nodes.map { rb =>
|
||||
rb.map { rw => Seq(rw) }
|
||||
}
|
||||
val uniform_w_nodes: Seq[Seq[Seq[TLNode]]] = spad_write_nodes.map { wb =>
|
||||
(wb zip spad_sp_write_nodes).map { case (ww, sw) => Seq(ww, sw) }
|
||||
}
|
||||
|
||||
val splitter_nodes = radiance_smem_fanout.map { m =>
|
||||
val splitter_node = RWSplitterNode()
|
||||
splitter_node := m
|
||||
splitter_node
|
||||
}
|
||||
|
||||
radiance_smem_fanout.foreach(clbus.inwardNode := _)
|
||||
|
||||
// these nodes are random access
|
||||
val nonuniform_r_nodes: Seq[TLNode] = splitter_nodes.map { s =>
|
||||
val nu_r_xbar = TLXbar()
|
||||
nu_r_xbar := s
|
||||
nu_r_xbar
|
||||
}.toSeq
|
||||
val nonuniform_w_nodes: Seq[TLNode] = splitter_nodes.map { s =>
|
||||
val nu_w_xbar = TLXbar()
|
||||
nu_w_xbar := s
|
||||
nu_w_xbar
|
||||
}.toSeq
|
||||
|
||||
smem_bank_mgrs.grouped(smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) =>
|
||||
bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
|
||||
// TODO: this should be a coordinated round robin
|
||||
val subbank_r_xbar = TLXbar(TLArbiter.lowestIndexFirst)
|
||||
val subbank_w_xbar = TLXbar(TLArbiter.lowestIndexFirst)
|
||||
r := subbank_r_xbar
|
||||
w := subbank_w_xbar
|
||||
uniform_r_nodes(bid)(wid).foreach( subbank_r_xbar := _ )
|
||||
uniform_w_nodes(bid)(wid).foreach( subbank_w_xbar := _ )
|
||||
|
||||
nonuniform_r_nodes.foreach( subbank_r_xbar := _ )
|
||||
nonuniform_w_nodes.foreach( subbank_w_xbar := _ )
|
||||
}
|
||||
}
|
||||
} else {
|
||||
unified_mem_read_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_read_nodes
|
||||
unified_mem_write_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_write_nodes
|
||||
unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node
|
||||
|
||||
val splitter_node = RWSplitterNode()
|
||||
unified_mem_read_node := TLWidthWidget(spad_data_len) := splitter_node
|
||||
unified_mem_write_node := TLWidthWidget(spad_data_len) := splitter_node
|
||||
|
||||
radiance_smem_fanout.foreach(clbus.inwardNode := _)
|
||||
splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode
|
||||
|
||||
val smem_r_xbar = TLXbar()
|
||||
val smem_w_xbar = TLXbar()
|
||||
DisableMonitors { implicit p =>
|
||||
smem_r_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_read_node
|
||||
smem_w_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_write_node
|
||||
}
|
||||
|
||||
smem_bank_mgrs.foreach { mem =>
|
||||
require(mem.length == 2)
|
||||
mem.head := smem_r_xbar
|
||||
mem.last := smem_w_xbar
|
||||
}
|
||||
}
|
||||
|
||||
// connect tile smem nodes to xbar, and xbar to banks
|
||||
// val smem_xbar = TLXbar()
|
||||
splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode
|
||||
gemminiTile.slaveNode :=* TLWidthWidget(4) :=* clbus.outwardNode
|
||||
// printf and perf counter buffer FIXME: make configurable
|
||||
TLRAM(AddressSet(x"ff004000", numCores * 0x200 - 1)) := TLFragmenter(4, 4) := clbus.outwardNode
|
||||
|
||||
assert(smem_size == 0x4000, "fix me")
|
||||
// printf and perf counter buffer
|
||||
TLRAM(AddressSet(x"ff000000" + smem_size, numCores * 0x200 - 1)) := TLFragmenter(4, 4) := clbus.outwardNode
|
||||
|
||||
// Diplomacy sink nodes for cluster-wide barrier sync signal
|
||||
val barrierSlaveNode = BarrierSlaveNode(numCores)
|
||||
@@ -174,7 +277,6 @@ class RadianceCluster (
|
||||
// (perSmemPortXbars zip tile.smemNodes).foreach {
|
||||
// case (xbar, node) => xbar.node := node
|
||||
// }
|
||||
tile.smemNodes.foreach(clbus.inwardNode := _)
|
||||
barrierSlaveNode := tile.barrierMasterNode
|
||||
}
|
||||
// perSmemPortXbars.foreach { clbus.inwardNode := _.node }
|
||||
@@ -212,23 +314,10 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
||||
}
|
||||
|
||||
// TODO: remove Pipeline dependency of gemmini
|
||||
def makeSmemBanks: Unit = {
|
||||
outer.smem_bank_mgrs.foreach { case Seq(r, w) =>
|
||||
val mem_depth = outer.smem_depth
|
||||
val mem_width = outer.smem_width
|
||||
|
||||
val mem = TwoPortSyncMem(
|
||||
n = mem_depth,
|
||||
t = UInt((mem_width * 8).W),
|
||||
mask_len = mem_width // byte level mask
|
||||
)
|
||||
|
||||
val (r_node, r_edge) = r.in.head
|
||||
val (w_node, w_edge) = w.in.head
|
||||
|
||||
// READ
|
||||
def makeSmemBanks(): Unit = {
|
||||
def make_buffer[T <: Data](mem: TwoPortSyncMem[T], r_node: TLBundle, r_edge: TLEdgeIn,
|
||||
w_node: TLBundle, w_edge: TLEdgeIn): Unit = {
|
||||
mem.io.ren := r_node.a.fire
|
||||
mem.io.raddr := (r_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
|
||||
|
||||
val data_pipe_in = Wire(DecoupledIO(mem.io.rdata.cloneType))
|
||||
data_pipe_in.valid := RegNext(mem.io.ren)
|
||||
@@ -274,7 +363,7 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
||||
r_node.d.bits := r_edge.AccessAck(
|
||||
Mux(r_node.d.valid, metadata_pipe.bits.source, 0.U),
|
||||
Mux(r_node.d.valid, metadata_pipe.bits.size, 0.U),
|
||||
Mux(!data_pipe.valid, sram_read_backup_reg.bits, data_pipe.bits))
|
||||
Mux(!data_pipe.valid, sram_read_backup_reg.bits, data_pipe.bits).asUInt)
|
||||
r_node.d.valid := data_pipe.valid || sram_read_backup_reg.valid
|
||||
// r node A is not ready only if D is not ready and both slots filled
|
||||
r_node.a.ready := r_node.d.ready && !(data_pipe.valid && sram_read_backup_reg.valid)
|
||||
@@ -283,16 +372,71 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
||||
|
||||
// WRITE
|
||||
mem.io.wen := w_node.a.fire
|
||||
mem.io.waddr := (w_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
|
||||
mem.io.wdata := w_node.a.bits.data
|
||||
mem.io.mask := w_node.a.bits.mask.asBools
|
||||
w_node.a.ready := w_node.d.ready// && (mem.io.waddr =/= mem.io.raddr)
|
||||
w_node.d.valid := w_node.a.valid
|
||||
w_node.d.bits := w_edge.AccessAck(w_node.a.bits)
|
||||
}
|
||||
|
||||
if (outer.stride_by_word) {
|
||||
outer.smem_bank_mgrs.grouped(outer.smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) =>
|
||||
assert(bank_mgrs.flatten.size == 2 * outer.smem_subbanks)
|
||||
bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
|
||||
assert(!r.portParams.map(_.anySupportPutFull).reduce(_ || _))
|
||||
assert(!w.portParams.map(_.anySupportGet).reduce(_ || _))
|
||||
|
||||
val mem_depth = outer.smem_depth
|
||||
val mem_width = outer.smem_width
|
||||
val word_width = outer.wordSize
|
||||
|
||||
val mem = TwoPortSyncMem(
|
||||
n = mem_depth,
|
||||
t = UInt((word_width * 8).W),
|
||||
mask_len = word_width // byte level mask
|
||||
)
|
||||
mem.suggestName(s"rad_smem_c${outer.thisClusterParams.clusterId}_b${bid}_w${wid}")
|
||||
|
||||
val (r_node, r_edge) = r.in.head
|
||||
val (w_node, w_edge) = w.in.head
|
||||
|
||||
// address format is
|
||||
// [ smem_base | bank_id | line_id | word_id | byte_offset ]
|
||||
// line_id is used to index into the SRAMs
|
||||
mem.io.raddr := (r_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U
|
||||
mem.io.waddr := (w_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U
|
||||
|
||||
assert((bid.U === ((r_node.a.bits.address & (mem_depth * mem_width * outer.smem_banks - 1).U) >>
|
||||
log2Ceil(mem_depth * mem_width).U).asUInt) || !r_node.a.valid, "bank id mismatch with request")
|
||||
assert((wid.U === ((r_node.a.bits.address & (mem_width - 1).U) >>
|
||||
log2Ceil(word_width).U).asUInt) || !r_node.a.valid, "word id mismatch with request")
|
||||
|
||||
make_buffer(mem, r_node, r_edge, w_node, w_edge)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
outer.smem_bank_mgrs.foreach { case Seq(r, w) =>
|
||||
val mem_depth = outer.smem_depth
|
||||
val mem_width = outer.smem_width
|
||||
|
||||
val mem = TwoPortSyncMem(
|
||||
n = mem_depth,
|
||||
t = UInt((mem_width * 8).W),
|
||||
mask_len = mem_width // byte level mask
|
||||
)
|
||||
|
||||
val (r_node, r_edge) = r.in.head
|
||||
val (w_node, w_edge) = w.in.head
|
||||
|
||||
mem.io.raddr := (r_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
|
||||
mem.io.waddr := (w_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
|
||||
|
||||
make_buffer(mem, r_node, r_edge, w_node, w_edge)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
makeSmemBanks
|
||||
makeSmemBanks()
|
||||
|
||||
println(s"======== barrierSlaveNode: ${outer.barrierSlaveNode.in(0)._2.barrierIdBits}")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user