word strided subbanks, parallel subbank access for gemmini and all-to-all xbar parallel access for radiance smem
This commit is contained in:
144
src/main/scala/radiance/memory/DistributorNode.scala
Normal file
144
src/main/scala/radiance/memory/DistributorNode.scala
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
package radiance.memory
|
||||||
|
|
||||||
|
import chisel3._
|
||||||
|
import chisel3.experimental.SourceInfo
|
||||||
|
import chisel3.util._
|
||||||
|
import freechips.rocketchip.diplomacy._
|
||||||
|
import freechips.rocketchip.tilelink._
|
||||||
|
import freechips.rocketchip.util.BundleField
|
||||||
|
import org.chipsalliance.cde.config.Parameters
|
||||||
|
|
||||||
|
|
||||||
|
class DistributorNode(from: Int, to: Int)(implicit p: Parameters) extends LazyModule {
|
||||||
|
require(isPow2(from) && isPow2(to) && (from >= to), "invalid distributor node parameters")
|
||||||
|
println(s"distributor node to segment from $from into $to")
|
||||||
|
val num_clients = from / to
|
||||||
|
|
||||||
|
val node = TLNexusNode(clientFn = seq => {
|
||||||
|
require(seq.map(_.masters.size).sum == 1, s"there should only be one client to a distributor node, found ${seq.map(_.masters.size).sum}")
|
||||||
|
val master = seq.head.masters.head
|
||||||
|
require(isPow2(master.sourceId.size))
|
||||||
|
seq.head.v1copy(
|
||||||
|
clients = Seq.tabulate(num_clients)(i => master.v2copy(
|
||||||
|
name = s"${name}_dist_client_$i",
|
||||||
|
emits = TLMasterToSlaveTransferSizes(
|
||||||
|
get = TransferSizes(to, to),
|
||||||
|
putFull = TransferSizes(to, to),
|
||||||
|
putPartial = TransferSizes(to, to)
|
||||||
|
),
|
||||||
|
sourceId = master.sourceId.shift(master.sourceId.size * i)
|
||||||
|
))
|
||||||
|
)
|
||||||
|
}, managerFn = seq => {
|
||||||
|
seq.head.v1copy(
|
||||||
|
responseFields = BundleField.union(seq.flatMap(_.responseFields)),
|
||||||
|
requestKeys = seq.flatMap(_.requestKeys).distinct,
|
||||||
|
minLatency = seq.map(_.minLatency).min,
|
||||||
|
endSinkId = TLXbar.mapOutputIds(seq).map(_.end).max,
|
||||||
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
|
name = Some(s"${name}_manager"),
|
||||||
|
address = AddressSet.unify(seq.flatMap(_.slaves.flatMap(_.address))),
|
||||||
|
supports = TLMasterToSlaveTransferSizes(
|
||||||
|
get = TransferSizes(from, from),
|
||||||
|
putFull = TransferSizes(from, from),
|
||||||
|
putPartial = TransferSizes(from, from)
|
||||||
|
),
|
||||||
|
fifoId = Some(0),
|
||||||
|
)),
|
||||||
|
beatBytes = from
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
lazy val module = new LazyModuleImp(this) {
|
||||||
|
val cn = node.in.head._1
|
||||||
|
val mn = node.out.map(_._1)
|
||||||
|
println(f"$name node in size ${node.in.size}, out size ${node.out.size}")
|
||||||
|
assert(node.out.size == num_clients, s"got ${node.out.size} clients instead of $num_clients")
|
||||||
|
|
||||||
|
// A channel
|
||||||
|
val ca = cn.a.bits
|
||||||
|
mn.map(_.a.bits).zipWithIndex.foreach { case (m, i) =>
|
||||||
|
println(s"$i master source id width ${m.source.getWidth}, client source id width ${ca.source.getWidth}")
|
||||||
|
m.opcode := ca.opcode
|
||||||
|
m.param := ca.param
|
||||||
|
m.user := ca.user
|
||||||
|
m.source := Cat(i.U(log2Ceil(num_clients).W), ca.source)
|
||||||
|
m.address := ca.address + (to * i).U
|
||||||
|
m.mask := ca.mask((i + 1) * to - 1, i * to)
|
||||||
|
m.data := ca.data((i + 1) * to * 8 - 1, i * to * 8)
|
||||||
|
m.size := log2Ceil(to).U
|
||||||
|
}
|
||||||
|
mn.map(_.a.valid).foreach(_ := cn.a.valid)
|
||||||
|
cn.a.ready := mn.map(_.a.ready).reduce(_ && _)
|
||||||
|
|
||||||
|
// D channel
|
||||||
|
val cd = cn.d.bits
|
||||||
|
cd.size := log2Ceil(from).U
|
||||||
|
val partialWait = RegInit(false.B)
|
||||||
|
val arrived = RegInit(0.U(num_clients.W))
|
||||||
|
val cdReg = RegInit(0.U.asTypeOf(cd.cloneType))
|
||||||
|
|
||||||
|
def setMetadata(to: TLBundleD, from: TLBundleD): Unit = {
|
||||||
|
to.opcode := from.opcode
|
||||||
|
to.user := from.user
|
||||||
|
to.param := from.param
|
||||||
|
to.sink := from.sink
|
||||||
|
to.denied := from.denied
|
||||||
|
to.corrupt := from.corrupt
|
||||||
|
to.source := from.source(to.source.getWidth - 1, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
def partialData: UInt = VecInit(mn.map(_.d).map(d => Mux(d.fire, d.bits.data, 0.U(d.bits.data.getWidth.W)))).asUInt
|
||||||
|
def partialValid: UInt = VecInit(mn.map(_.d.fire)).asUInt
|
||||||
|
|
||||||
|
mn.map(_.d.ready).zip(arrived.asBools).foreach { case (r, a) =>
|
||||||
|
r := cn.d.ready && (!partialWait || !a) // if waiting for partial response, ready only if not arrived yet
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: might need coverage test for this
|
||||||
|
when (!partialWait) {
|
||||||
|
cn.d.valid := false.B
|
||||||
|
partialWait := false.B
|
||||||
|
when (partialValid.asBools.reduce(_ && _)) {
|
||||||
|
// all valids, immediately return both metadata and data
|
||||||
|
cn.d.valid := true.B
|
||||||
|
cd.data := Cat(mn.map(_.d.bits.data).reverse)
|
||||||
|
setMetadata(cd, mn.head.d.bits)
|
||||||
|
assert(cd.data === partialData, "sanity check")
|
||||||
|
}.elsewhen (partialValid.asBools.reduce(_ || _)) {
|
||||||
|
// at least 1 valid: enter partial valid state, store partial data into regs
|
||||||
|
partialWait := true.B
|
||||||
|
arrived := partialValid
|
||||||
|
cdReg.data := partialData
|
||||||
|
when (mn.head.d.valid) { setMetadata(cdReg, mn.head.d.bits) }
|
||||||
|
}
|
||||||
|
}.otherwise {
|
||||||
|
cn.d.valid := false.B
|
||||||
|
partialWait := true.B
|
||||||
|
when ((arrived | partialValid).asBools.reduce(_ && _)) {
|
||||||
|
// all valids received now
|
||||||
|
when (mn.head.d.valid) {
|
||||||
|
setMetadata(cd, mn.head.d.bits)
|
||||||
|
}.otherwise {
|
||||||
|
cd := cdReg
|
||||||
|
}
|
||||||
|
cn.d.valid := true.B
|
||||||
|
cd.data := cdReg.data | partialData
|
||||||
|
partialWait := false.B
|
||||||
|
cdReg := 0.U.asTypeOf(cdReg.cloneType)
|
||||||
|
arrived := 0.U
|
||||||
|
}.elsewhen (partialValid.asBools.reduce(_ || _)) {
|
||||||
|
// update partial data
|
||||||
|
arrived := arrived | partialValid
|
||||||
|
cdReg.data := cdReg.data | partialData
|
||||||
|
when (mn.head.d.valid) { setMetadata(cdReg, mn.head.d.bits) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object DistributorNode {
|
||||||
|
def apply(from: Int, to: Int)(implicit p: Parameters, valName: ValName, sourceInfo: SourceInfo): TLNexusNode = {
|
||||||
|
LazyModule(new DistributorNode(from, to)).node
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -27,7 +27,7 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten
|
|||||||
require(isPow2(vis_mask + 1) || vis_mask == -1)
|
require(isPow2(vis_mask + 1) || vis_mask == -1)
|
||||||
println(f"combined visibilities of splitter memory node clients: ${vis_min}, ${vis_mask}")
|
println(f"combined visibilities of splitter memory node clients: ${vis_min}, ${vis_mask}")
|
||||||
|
|
||||||
seq(0).v1copy(
|
seq.head.v1copy(
|
||||||
echoFields = BundleField.union(seq.flatMap(_.echoFields)),
|
echoFields = BundleField.union(seq.flatMap(_.echoFields)),
|
||||||
requestFields = BundleField.union(seq.flatMap(_.requestFields)),
|
requestFields = BundleField.union(seq.flatMap(_.requestFields)),
|
||||||
responseKeys = seq.flatMap(_.responseKeys).distinct,
|
responseKeys = seq.flatMap(_.responseKeys).distinct,
|
||||||
@@ -56,9 +56,8 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten
|
|||||||
)
|
)
|
||||||
},
|
},
|
||||||
managerFn = { seq =>
|
managerFn = { seq =>
|
||||||
println(seq.flatMap(_.slaves.map(_.supports)))
|
|
||||||
// val fifoIdFactory = TLXbar.relabeler()
|
// val fifoIdFactory = TLXbar.relabeler()
|
||||||
seq(0).v1copy(
|
seq.head.v1copy(
|
||||||
responseFields = BundleField.union(seq.flatMap(_.responseFields)),
|
responseFields = BundleField.union(seq.flatMap(_.responseFields)),
|
||||||
requestKeys = seq.flatMap(_.requestKeys).distinct,
|
requestKeys = seq.flatMap(_.requestKeys).distinct,
|
||||||
minLatency = seq.map(_.minLatency).min,
|
minLatency = seq.map(_.minLatency).min,
|
||||||
@@ -81,7 +80,7 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten
|
|||||||
val u_out = node.out
|
val u_out = node.out
|
||||||
val u_in = node.in
|
val u_in = node.in
|
||||||
assert(u_out.length == 2)
|
assert(u_out.length == 2)
|
||||||
println(f"gemmini unified memory node has ${u_in.length} incoming client(s)")
|
println(f"${name} has ${u_in.length} incoming client(s)")
|
||||||
|
|
||||||
val r_out = u_out.head
|
val r_out = u_out.head
|
||||||
val w_out = u_out.last
|
val w_out = u_out.last
|
||||||
|
|||||||
@@ -7,13 +7,11 @@ import chisel3._
|
|||||||
import chisel3.util._
|
import chisel3.util._
|
||||||
import freechips.rocketchip.diplomacy._
|
import freechips.rocketchip.diplomacy._
|
||||||
import freechips.rocketchip.prci.ClockSinkParameters
|
import freechips.rocketchip.prci.ClockSinkParameters
|
||||||
import freechips.rocketchip.regmapper.RegField
|
|
||||||
import freechips.rocketchip.subsystem._
|
import freechips.rocketchip.subsystem._
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
import freechips.rocketchip.util.BundleField
|
|
||||||
import gemmini._
|
import gemmini._
|
||||||
import org.chipsalliance.cde.config.Parameters
|
import org.chipsalliance.cde.config.Parameters
|
||||||
import radiance.memory.RWSplitterNode
|
import radiance.memory._
|
||||||
|
|
||||||
case class RadianceClusterParams(
|
case class RadianceClusterParams(
|
||||||
val clusterId: Int,
|
val clusterId: Int,
|
||||||
@@ -69,50 +67,63 @@ class RadianceCluster (
|
|||||||
// TODO: stride by word
|
// TODO: stride by word
|
||||||
val unified_mem_read_node = TLIdentityNode()
|
val unified_mem_read_node = TLIdentityNode()
|
||||||
val unified_mem_write_node = TLIdentityNode()
|
val unified_mem_write_node = TLIdentityNode()
|
||||||
|
|
||||||
val spad_data_len = gemminiConfig.sp_width / 8
|
val spad_data_len = gemminiConfig.sp_width / 8
|
||||||
val acc_data_len = gemminiConfig.sp_width / gemminiConfig.inputType.getWidth * gemminiConfig.accType.getWidth / 8
|
val acc_data_len = gemminiConfig.sp_width / gemminiConfig.inputType.getWidth * gemminiConfig.accType.getWidth / 8
|
||||||
val max_data_len = spad_data_len // max acc_data_len
|
|
||||||
val smem_base = gemminiConfig.tl_ext_mem_base
|
val smem_base = gemminiConfig.tl_ext_mem_base
|
||||||
val smem_depth = gemminiConfig.sp_bank_entries * spad_data_len / max_data_len
|
val smem_width = spad_data_len
|
||||||
val smem_width = max_data_len
|
val smem_depth = gemminiConfig.sp_bank_entries * spad_data_len / smem_width
|
||||||
val smem_banks = gemminiConfig.sp_banks
|
val smem_banks = gemminiConfig.sp_banks
|
||||||
val smem_subbanks = 1
|
val smem_subbanks = smem_width / wordSize
|
||||||
|
val smem_size = smem_width * smem_depth * smem_banks
|
||||||
|
|
||||||
val splitter_node = RWSplitterNode()
|
val stride_by_word = true
|
||||||
|
|
||||||
unified_mem_read_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_read_nodes
|
val radiance_smem_fanout = radianceTiles.flatMap {
|
||||||
unified_mem_write_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_write_nodes
|
_.smemNodes.map { m =>
|
||||||
unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node
|
val smem_fanout_xbar = TLXbar()
|
||||||
// unified_mem_read_node :=* TLWidthWidget(acc_data_len) :=* acc_read_nodes
|
smem_fanout_xbar :=* m
|
||||||
// unified_mem_write_node :=* TLWidthWidget(acc_data_len) :=* acc_write_nodes
|
smem_fanout_xbar
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// assert(splitter_node.in.map(_._2.slave.slaves.flatMap(_.supports.get)))
|
require(isPow2(smem_banks))
|
||||||
|
|
||||||
/* address = Seq(AddressSet(gemmini.spad_base, smem_depth * smem_width * smem_banks - 1)),
|
|
||||||
supports = TLMasterToSlaveTransferSizes(
|
|
||||||
get = TransferSizes(1, smem_width),
|
|
||||||
putFull = TransferSizes(1, smem_width),
|
|
||||||
putPartial = TransferSizes(1, smem_width)),*/
|
|
||||||
|
|
||||||
unified_mem_read_node := TLWidthWidget(spad_data_len) := splitter_node
|
|
||||||
unified_mem_write_node := TLWidthWidget(spad_data_len) := splitter_node
|
|
||||||
|
|
||||||
val stride_by_word = false
|
|
||||||
// collection of read and write managers for each sram (sub)bank
|
// collection of read and write managers for each sram (sub)bank
|
||||||
val smem_bank_mgrs : Seq[Seq[TLManagerNode]] = if (stride_by_word) {
|
val smem_bank_mgrs : Seq[Seq[TLManagerNode]] = if (stride_by_word) {
|
||||||
assert(false, "TODO under construction")
|
require(isPow2(smem_subbanks))
|
||||||
// assert((config.sp_capacity match { case CapacityInKilobytes(kb) => kb * 1024}) ==
|
(0 until smem_banks).flatMap { bid =>
|
||||||
// gemmini.config.sp_bank_entries * spad_data_len / max_data_len * gemmini.config.sp_banks * max_data_len)
|
(0 until smem_subbanks).map { wid =>
|
||||||
(0 until gemminiConfig.sp_banks).map { bank =>
|
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||||
LazyModule(new TLRAM(
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
address = AddressSet(max_data_len * bank,
|
name = Some(f"sp_bank${bid}_word${wid}_read_mgr"),
|
||||||
((gemminiConfig.sp_bank_entries * spad_data_len / max_data_len - 1) * gemminiConfig.sp_banks + bank)
|
address = Seq(AddressSet(
|
||||||
* max_data_len + (max_data_len - 1)),
|
smem_base + (smem_depth * smem_width * bid) + wordSize * wid,
|
||||||
beatBytes = max_data_len
|
smem_depth * smem_width - smem_width + wordSize - 1
|
||||||
|
)),
|
||||||
|
supports = TLMasterToSlaveTransferSizes(
|
||||||
|
get = TransferSizes(wordSize, wordSize)),
|
||||||
|
fifoId = Some(0)
|
||||||
|
)),
|
||||||
|
beatBytes = wordSize
|
||||||
))
|
))
|
||||||
}.map(x => Seq(x.node))
|
), TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||||
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
|
name = Some(f"sp_bank${bid}_word${wid}_write_mgr"),
|
||||||
|
address = Seq(AddressSet(
|
||||||
|
smem_base + (smem_depth * smem_width * bid) + wordSize * wid,
|
||||||
|
smem_depth * smem_width - smem_width + wordSize - 1
|
||||||
|
)),
|
||||||
|
supports = TLMasterToSlaveTransferSizes(
|
||||||
|
putFull = TransferSizes(wordSize, wordSize),
|
||||||
|
putPartial = TransferSizes(wordSize, wordSize)),
|
||||||
|
fifoId = Some(0)
|
||||||
|
)),
|
||||||
|
beatBytes = wordSize
|
||||||
|
))))
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
require(isPow2(smem_banks))
|
|
||||||
(0 until smem_banks).map { bank =>
|
(0 until smem_banks).map { bank =>
|
||||||
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
|
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||||
managers = Seq(TLSlaveParameters.v2(
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
@@ -124,8 +135,8 @@ class RadianceCluster (
|
|||||||
fifoId = Some(0)
|
fifoId = Some(0)
|
||||||
)),
|
)),
|
||||||
beatBytes = smem_width
|
beatBytes = smem_width
|
||||||
))),
|
))
|
||||||
TLManagerNode(Seq(TLSlavePortParameters.v1(
|
), TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||||
managers = Seq(TLSlaveParameters.v2(
|
managers = Seq(TLSlaveParameters.v2(
|
||||||
name = Some(f"sp_bank${bank}_write_mgr"),
|
name = Some(f"sp_bank${bank}_write_mgr"),
|
||||||
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank),
|
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank),
|
||||||
@@ -140,23 +151,115 @@ class RadianceCluster (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (stride_by_word) {
|
||||||
|
val spad_read_nodes = Seq.fill(smem_banks) {
|
||||||
|
val r_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||||
|
r_dist := gemmini.spad_read_nodes
|
||||||
|
Seq.fill(smem_subbanks) {
|
||||||
|
val id_node = TLIdentityNode()
|
||||||
|
id_node := r_dist
|
||||||
|
id_node
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val spad_write_nodes = Seq.fill(smem_banks) {
|
||||||
|
val w_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||||
|
w_dist := gemmini.spad_write_nodes
|
||||||
|
Seq.fill(smem_subbanks) {
|
||||||
|
val id_node = TLIdentityNode()
|
||||||
|
id_node := w_dist
|
||||||
|
id_node
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val ws_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||||
|
ws_dist := gemmini.spad.spad_writer.node // this is the dma write node
|
||||||
|
val spad_sp_write_nodes = Seq.fill(smem_subbanks) {
|
||||||
|
val ws_xbar = TLXbar() // fanout to 4 banks
|
||||||
|
ws_xbar := ws_dist
|
||||||
|
ws_xbar
|
||||||
|
}
|
||||||
|
|
||||||
|
// spad_read_nodes.flatten.foreach(node => unified_mem_read_node :=* node)
|
||||||
|
// spad_write_nodes.flatten.foreach(node => unified_mem_write_node :=* node)
|
||||||
|
// spad_sp_write_nodes.foreach(node => unified_mem_write_node :=* node)
|
||||||
|
// unified_mem_write_node :=* DistributorNode(from = smem_width, to = wordSize) :=* gemmini.spad.spad_writer.node // this is the dma write node
|
||||||
|
// unified_mem_read_node :=* TLWidthWidget(acc_data_len) :=* acc_read_nodes
|
||||||
|
// unified_mem_write_node :=* TLWidthWidget(acc_data_len) :=* acc_write_nodes
|
||||||
|
|
||||||
|
// these nodes access an entire line simultaneously
|
||||||
|
val uniform_r_nodes: Seq[Seq[Seq[TLNode]]] = spad_read_nodes.map { rb =>
|
||||||
|
rb.map { rw => Seq(rw) }
|
||||||
|
}
|
||||||
|
val uniform_w_nodes: Seq[Seq[Seq[TLNode]]] = spad_write_nodes.map { wb =>
|
||||||
|
(wb zip spad_sp_write_nodes).map { case (ww, sw) => Seq(ww, sw) }
|
||||||
|
}
|
||||||
|
|
||||||
|
val splitter_nodes = radiance_smem_fanout.map { m =>
|
||||||
|
val splitter_node = RWSplitterNode()
|
||||||
|
splitter_node := m
|
||||||
|
splitter_node
|
||||||
|
}
|
||||||
|
|
||||||
|
radiance_smem_fanout.foreach(clbus.inwardNode := _)
|
||||||
|
|
||||||
|
// these nodes are random access
|
||||||
|
val nonuniform_r_nodes: Seq[TLNode] = splitter_nodes.map { s =>
|
||||||
|
val nu_r_xbar = TLXbar()
|
||||||
|
nu_r_xbar := s
|
||||||
|
nu_r_xbar
|
||||||
|
}.toSeq
|
||||||
|
val nonuniform_w_nodes: Seq[TLNode] = splitter_nodes.map { s =>
|
||||||
|
val nu_w_xbar = TLXbar()
|
||||||
|
nu_w_xbar := s
|
||||||
|
nu_w_xbar
|
||||||
|
}.toSeq
|
||||||
|
|
||||||
|
smem_bank_mgrs.grouped(smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) =>
|
||||||
|
bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
|
||||||
|
// TODO: this should be a coordinated round robin
|
||||||
|
val subbank_r_xbar = TLXbar(TLArbiter.lowestIndexFirst)
|
||||||
|
val subbank_w_xbar = TLXbar(TLArbiter.lowestIndexFirst)
|
||||||
|
r := subbank_r_xbar
|
||||||
|
w := subbank_w_xbar
|
||||||
|
uniform_r_nodes(bid)(wid).foreach( subbank_r_xbar := _ )
|
||||||
|
uniform_w_nodes(bid)(wid).foreach( subbank_w_xbar := _ )
|
||||||
|
|
||||||
|
nonuniform_r_nodes.foreach( subbank_r_xbar := _ )
|
||||||
|
nonuniform_w_nodes.foreach( subbank_w_xbar := _ )
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unified_mem_read_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_read_nodes
|
||||||
|
unified_mem_write_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_write_nodes
|
||||||
|
unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node
|
||||||
|
|
||||||
|
val splitter_node = RWSplitterNode()
|
||||||
|
unified_mem_read_node := TLWidthWidget(spad_data_len) := splitter_node
|
||||||
|
unified_mem_write_node := TLWidthWidget(spad_data_len) := splitter_node
|
||||||
|
|
||||||
|
radiance_smem_fanout.foreach(clbus.inwardNode := _)
|
||||||
|
splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode
|
||||||
|
|
||||||
val smem_r_xbar = TLXbar()
|
val smem_r_xbar = TLXbar()
|
||||||
val smem_w_xbar = TLXbar()
|
val smem_w_xbar = TLXbar()
|
||||||
smem_r_xbar :=* unified_mem_read_node
|
DisableMonitors { implicit p =>
|
||||||
smem_w_xbar :=* unified_mem_write_node
|
smem_r_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_read_node
|
||||||
|
smem_w_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_write_node
|
||||||
|
}
|
||||||
|
|
||||||
smem_bank_mgrs.foreach { mem =>
|
smem_bank_mgrs.foreach { mem =>
|
||||||
require(mem.length == 2)
|
require(mem.length == 2)
|
||||||
mem.head := smem_r_xbar
|
mem.head := smem_r_xbar
|
||||||
mem.last := TLFragmenter(spad_data_len, max_write_width_bytes) := smem_w_xbar
|
mem.last := smem_w_xbar
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// connect tile smem nodes to xbar, and xbar to banks
|
// connect tile smem nodes to xbar, and xbar to banks
|
||||||
// val smem_xbar = TLXbar()
|
// val smem_xbar = TLXbar()
|
||||||
splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode
|
|
||||||
gemminiTile.slaveNode :=* TLWidthWidget(4) :=* clbus.outwardNode
|
gemminiTile.slaveNode :=* TLWidthWidget(4) :=* clbus.outwardNode
|
||||||
// printf and perf counter buffer FIXME: make configurable
|
|
||||||
TLRAM(AddressSet(x"ff004000", numCores * 0x200 - 1)) := TLFragmenter(4, 4) := clbus.outwardNode
|
assert(smem_size == 0x4000, "fix me")
|
||||||
|
// printf and perf counter buffer
|
||||||
|
TLRAM(AddressSet(x"ff000000" + smem_size, numCores * 0x200 - 1)) := TLFragmenter(4, 4) := clbus.outwardNode
|
||||||
|
|
||||||
// Diplomacy sink nodes for cluster-wide barrier sync signal
|
// Diplomacy sink nodes for cluster-wide barrier sync signal
|
||||||
val barrierSlaveNode = BarrierSlaveNode(numCores)
|
val barrierSlaveNode = BarrierSlaveNode(numCores)
|
||||||
@@ -174,7 +277,6 @@ class RadianceCluster (
|
|||||||
// (perSmemPortXbars zip tile.smemNodes).foreach {
|
// (perSmemPortXbars zip tile.smemNodes).foreach {
|
||||||
// case (xbar, node) => xbar.node := node
|
// case (xbar, node) => xbar.node := node
|
||||||
// }
|
// }
|
||||||
tile.smemNodes.foreach(clbus.inwardNode := _)
|
|
||||||
barrierSlaveNode := tile.barrierMasterNode
|
barrierSlaveNode := tile.barrierMasterNode
|
||||||
}
|
}
|
||||||
// perSmemPortXbars.foreach { clbus.inwardNode := _.node }
|
// perSmemPortXbars.foreach { clbus.inwardNode := _.node }
|
||||||
@@ -212,23 +314,10 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: remove Pipeline dependency of gemmini
|
// TODO: remove Pipeline dependency of gemmini
|
||||||
def makeSmemBanks: Unit = {
|
def makeSmemBanks(): Unit = {
|
||||||
outer.smem_bank_mgrs.foreach { case Seq(r, w) =>
|
def make_buffer[T <: Data](mem: TwoPortSyncMem[T], r_node: TLBundle, r_edge: TLEdgeIn,
|
||||||
val mem_depth = outer.smem_depth
|
w_node: TLBundle, w_edge: TLEdgeIn): Unit = {
|
||||||
val mem_width = outer.smem_width
|
|
||||||
|
|
||||||
val mem = TwoPortSyncMem(
|
|
||||||
n = mem_depth,
|
|
||||||
t = UInt((mem_width * 8).W),
|
|
||||||
mask_len = mem_width // byte level mask
|
|
||||||
)
|
|
||||||
|
|
||||||
val (r_node, r_edge) = r.in.head
|
|
||||||
val (w_node, w_edge) = w.in.head
|
|
||||||
|
|
||||||
// READ
|
|
||||||
mem.io.ren := r_node.a.fire
|
mem.io.ren := r_node.a.fire
|
||||||
mem.io.raddr := (r_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
|
|
||||||
|
|
||||||
val data_pipe_in = Wire(DecoupledIO(mem.io.rdata.cloneType))
|
val data_pipe_in = Wire(DecoupledIO(mem.io.rdata.cloneType))
|
||||||
data_pipe_in.valid := RegNext(mem.io.ren)
|
data_pipe_in.valid := RegNext(mem.io.ren)
|
||||||
@@ -274,7 +363,7 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
|||||||
r_node.d.bits := r_edge.AccessAck(
|
r_node.d.bits := r_edge.AccessAck(
|
||||||
Mux(r_node.d.valid, metadata_pipe.bits.source, 0.U),
|
Mux(r_node.d.valid, metadata_pipe.bits.source, 0.U),
|
||||||
Mux(r_node.d.valid, metadata_pipe.bits.size, 0.U),
|
Mux(r_node.d.valid, metadata_pipe.bits.size, 0.U),
|
||||||
Mux(!data_pipe.valid, sram_read_backup_reg.bits, data_pipe.bits))
|
Mux(!data_pipe.valid, sram_read_backup_reg.bits, data_pipe.bits).asUInt)
|
||||||
r_node.d.valid := data_pipe.valid || sram_read_backup_reg.valid
|
r_node.d.valid := data_pipe.valid || sram_read_backup_reg.valid
|
||||||
// r node A is not ready only if D is not ready and both slots filled
|
// r node A is not ready only if D is not ready and both slots filled
|
||||||
r_node.a.ready := r_node.d.ready && !(data_pipe.valid && sram_read_backup_reg.valid)
|
r_node.a.ready := r_node.d.ready && !(data_pipe.valid && sram_read_backup_reg.valid)
|
||||||
@@ -283,16 +372,71 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
|||||||
|
|
||||||
// WRITE
|
// WRITE
|
||||||
mem.io.wen := w_node.a.fire
|
mem.io.wen := w_node.a.fire
|
||||||
mem.io.waddr := (w_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
|
|
||||||
mem.io.wdata := w_node.a.bits.data
|
mem.io.wdata := w_node.a.bits.data
|
||||||
mem.io.mask := w_node.a.bits.mask.asBools
|
mem.io.mask := w_node.a.bits.mask.asBools
|
||||||
w_node.a.ready := w_node.d.ready// && (mem.io.waddr =/= mem.io.raddr)
|
w_node.a.ready := w_node.d.ready// && (mem.io.waddr =/= mem.io.raddr)
|
||||||
w_node.d.valid := w_node.a.valid
|
w_node.d.valid := w_node.a.valid
|
||||||
w_node.d.bits := w_edge.AccessAck(w_node.a.bits)
|
w_node.d.bits := w_edge.AccessAck(w_node.a.bits)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (outer.stride_by_word) {
|
||||||
|
outer.smem_bank_mgrs.grouped(outer.smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) =>
|
||||||
|
assert(bank_mgrs.flatten.size == 2 * outer.smem_subbanks)
|
||||||
|
bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
|
||||||
|
assert(!r.portParams.map(_.anySupportPutFull).reduce(_ || _))
|
||||||
|
assert(!w.portParams.map(_.anySupportGet).reduce(_ || _))
|
||||||
|
|
||||||
|
val mem_depth = outer.smem_depth
|
||||||
|
val mem_width = outer.smem_width
|
||||||
|
val word_width = outer.wordSize
|
||||||
|
|
||||||
|
val mem = TwoPortSyncMem(
|
||||||
|
n = mem_depth,
|
||||||
|
t = UInt((word_width * 8).W),
|
||||||
|
mask_len = word_width // byte level mask
|
||||||
|
)
|
||||||
|
mem.suggestName(s"rad_smem_c${outer.thisClusterParams.clusterId}_b${bid}_w${wid}")
|
||||||
|
|
||||||
|
val (r_node, r_edge) = r.in.head
|
||||||
|
val (w_node, w_edge) = w.in.head
|
||||||
|
|
||||||
|
// address format is
|
||||||
|
// [ smem_base | bank_id | line_id | word_id | byte_offset ]
|
||||||
|
// line_id is used to index into the SRAMs
|
||||||
|
mem.io.raddr := (r_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U
|
||||||
|
mem.io.waddr := (w_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U
|
||||||
|
|
||||||
|
assert((bid.U === ((r_node.a.bits.address & (mem_depth * mem_width * outer.smem_banks - 1).U) >>
|
||||||
|
log2Ceil(mem_depth * mem_width).U).asUInt) || !r_node.a.valid, "bank id mismatch with request")
|
||||||
|
assert((wid.U === ((r_node.a.bits.address & (mem_width - 1).U) >>
|
||||||
|
log2Ceil(word_width).U).asUInt) || !r_node.a.valid, "word id mismatch with request")
|
||||||
|
|
||||||
|
make_buffer(mem, r_node, r_edge, w_node, w_edge)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
outer.smem_bank_mgrs.foreach { case Seq(r, w) =>
|
||||||
|
val mem_depth = outer.smem_depth
|
||||||
|
val mem_width = outer.smem_width
|
||||||
|
|
||||||
|
val mem = TwoPortSyncMem(
|
||||||
|
n = mem_depth,
|
||||||
|
t = UInt((mem_width * 8).W),
|
||||||
|
mask_len = mem_width // byte level mask
|
||||||
|
)
|
||||||
|
|
||||||
|
val (r_node, r_edge) = r.in.head
|
||||||
|
val (w_node, w_edge) = w.in.head
|
||||||
|
|
||||||
|
mem.io.raddr := (r_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
|
||||||
|
mem.io.waddr := (w_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
|
||||||
|
|
||||||
|
make_buffer(mem, r_node, r_edge, w_node, w_edge)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
makeSmemBanks
|
makeSmemBanks()
|
||||||
|
|
||||||
println(s"======== barrierSlaveNode: ${outer.barrierSlaveNode.in(0)._2.barrierIdBits}")
|
println(s"======== barrierSlaveNode: ${outer.barrierSlaveNode.in(0)._2.barrierIdBits}")
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user