Merge commit 'origin/main~1'
This commit is contained in:
Submodule src/main/resources/vsrc/vortex updated: da54162241...cde8da1f3b
@@ -91,13 +91,15 @@ class DistributorNode(from: Int, to: Int)(implicit p: Parameters) extends LazyMo
|
||||
}
|
||||
|
||||
def partialData: UInt = VecInit(mn.map(_.d).map(d => Mux(d.fire, d.bits.data, 0.U(d.bits.data.getWidth.W)))).asUInt
|
||||
def partialValid: UInt = VecInit(mn.map(_.d.fire)).asUInt
|
||||
def partialValid: UInt = VecInit(mn.map(_.d.valid)).asUInt
|
||||
def partialFire: UInt = VecInit(mn.map(_.d.fire)).asUInt
|
||||
|
||||
mn.map(_.d.ready).zip(arrived.asBools).foreach { case (r, a) =>
|
||||
r := cn.d.ready && (!partialWait || !a) // if waiting for partial response, ready only if not arrived yet
|
||||
}
|
||||
|
||||
// TODO: might need coverage test for this
|
||||
cd := DontCare
|
||||
when (!partialWait) {
|
||||
cn.d.valid := false.B
|
||||
partialWait := false.B
|
||||
@@ -109,31 +111,36 @@ class DistributorNode(from: Int, to: Int)(implicit p: Parameters) extends LazyMo
|
||||
assert(cd.data === partialData, "sanity check")
|
||||
}.elsewhen (partialValid.orR) {
|
||||
// at least 1 valid: enter partial valid state, store partial data into regs
|
||||
partialWait := true.B
|
||||
arrived := partialValid
|
||||
partialWait := cn.d.ready // if something fired, enter partial wait
|
||||
arrived := partialFire
|
||||
cdReg.data := partialData
|
||||
when (mn.head.d.valid) { setMetadata(cdReg, mn.head.d.bits) }
|
||||
when (mn.head.d.fire) { setMetadata(cdReg, mn.head.d.bits) }
|
||||
}
|
||||
}.otherwise {
|
||||
cn.d.valid := false.B
|
||||
partialWait := true.B
|
||||
when ((arrived | partialValid).andR) {
|
||||
// all valids received now
|
||||
when (mn.head.d.valid) {
|
||||
setMetadata(cd, mn.head.d.bits)
|
||||
}.otherwise {
|
||||
cd := cdReg
|
||||
}
|
||||
cn.d.valid := true.B
|
||||
cd.data := cdReg.data | partialData
|
||||
partialWait := false.B
|
||||
cdReg := 0.U.asTypeOf(cdReg.cloneType)
|
||||
arrived := 0.U
|
||||
when (cn.d.ready) {
|
||||
assert((arrived | partialFire).andR)
|
||||
when (mn.head.d.valid) {
|
||||
setMetadata(cd, mn.head.d.bits)
|
||||
}.otherwise {
|
||||
cd := cdReg
|
||||
}
|
||||
cd.data := cdReg.data | partialData
|
||||
partialWait := false.B
|
||||
cdReg := 0.U.asTypeOf(cdReg.cloneType)
|
||||
arrived := 0.U
|
||||
}
|
||||
}.elsewhen (partialValid.orR) {
|
||||
// update partial data
|
||||
arrived := arrived | partialValid
|
||||
cdReg.data := cdReg.data | partialData
|
||||
when (mn.head.d.valid) { setMetadata(cdReg, mn.head.d.bits) }
|
||||
when (cn.d.ready) {
|
||||
arrived := arrived | partialValid
|
||||
cdReg.data := cdReg.data | partialData
|
||||
when (mn.head.d.valid) { setMetadata(cdReg, mn.head.d.bits) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
81
src/main/scala/radiance/memory/SyncMem.scala
Normal file
81
src/main/scala/radiance/memory/SyncMem.scala
Normal file
@@ -0,0 +1,81 @@
|
||||
package radiance.memory
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
import midas.targetutils.SynthesizePrintf
|
||||
|
||||
// modified from gemmini's two port sync mem
|
||||
class TwoPortSyncMem[T <: Data](n: Int, t: T, maskedUnitWidth: Int = 8) extends Module {
|
||||
val maskWidth = t.getWidth / maskedUnitWidth
|
||||
val io = IO(new Bundle {
|
||||
val waddr = Input(UInt((log2Ceil(n) max 1).W))
|
||||
val raddr = Input(UInt((log2Ceil(n) max 1).W))
|
||||
val wdata = Input(t)
|
||||
val rdata = Output(t)
|
||||
val wen = Input(Bool())
|
||||
val ren = Input(Bool())
|
||||
val mask = Input(UInt(maskWidth.W))
|
||||
})
|
||||
|
||||
when (io.wen && io.ren && io.raddr === io.waddr) {
|
||||
SynthesizePrintf(printf("WARNING: read and write collided at address 0x%x\n", io.raddr))
|
||||
}
|
||||
|
||||
val maskElem = UInt(maskedUnitWidth.W)
|
||||
val memT = Vec(maskWidth, maskElem)
|
||||
val mem = SyncReadMem(n, memT, SyncReadMem.WriteFirst)
|
||||
|
||||
io.rdata := mem.read(io.raddr, io.ren).asTypeOf(t)
|
||||
|
||||
when (io.wen) {
|
||||
mem.write(io.waddr, io.wdata.asTypeOf(memT), io.mask.asBools)
|
||||
}
|
||||
}
|
||||
|
||||
class TwoReadOneWriteSyncMem[T <: Data](n: Int, t: T, maskedUnitWidth: Int = 8) extends Module {
|
||||
val maskWidth = t.getWidth / maskedUnitWidth
|
||||
val io = IO(new Bundle {
|
||||
val waddr = Input(UInt((log2Ceil(n) max 1).W))
|
||||
val raddr0 = Input(UInt((log2Ceil(n) max 1).W))
|
||||
val raddr1 = Input(UInt((log2Ceil(n) max 1).W))
|
||||
val wdata = Input(t)
|
||||
val rdata0 = Output(t)
|
||||
val rdata1 = Output(t)
|
||||
val wen = Input(Bool())
|
||||
val ren0 = Input(Bool())
|
||||
val ren1 = Input(Bool())
|
||||
val mask = Input(UInt(maskWidth.W))
|
||||
})
|
||||
|
||||
when (io.wen && io.ren0 && io.raddr0 === io.waddr) {
|
||||
SynthesizePrintf(printf("WARNING: read0 and write collided at address 0x%x\n", io.raddr0))
|
||||
}
|
||||
when (io.wen && io.ren1 && io.raddr1 === io.waddr) {
|
||||
SynthesizePrintf(printf("WARNING: read1 and write collided at address 0x%x\n", io.raddr1))
|
||||
}
|
||||
|
||||
val maskElem = UInt(maskedUnitWidth.W)
|
||||
val memT = Vec(maskWidth, maskElem)
|
||||
val mem0 = SyncReadMem(n, memT, SyncReadMem.WriteFirst)
|
||||
val mem1 = SyncReadMem(n, memT, SyncReadMem.WriteFirst)
|
||||
|
||||
io.rdata0 := mem0.read(io.raddr0, io.ren0).asTypeOf(t)
|
||||
io.rdata1 := mem1.read(io.raddr1, io.ren1).asTypeOf(t)
|
||||
|
||||
when (io.wen) {
|
||||
mem0.write(io.waddr, io.wdata.asTypeOf(memT), io.mask.asBools)
|
||||
mem1.write(io.waddr, io.wdata.asTypeOf(memT), io.mask.asBools)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
object TwoPortSyncMem {
|
||||
def apply[T <: Data](n: Int, t: T, maskedUnitWidth: Int = 8): TwoPortSyncMem[T] = {
|
||||
Module(new TwoPortSyncMem[T](n, t, maskedUnitWidth))
|
||||
}
|
||||
}
|
||||
|
||||
object TwoReadOneWriteSyncMem {
|
||||
def apply[T <: Data](n: Int, t: T, maskedUnitWidth: Int = 8): TwoReadOneWriteSyncMem[T] = {
|
||||
Module(new TwoReadOneWriteSyncMem[T](n, t, maskedUnitWidth))
|
||||
}
|
||||
}
|
||||
@@ -20,17 +20,22 @@ object ExtPolicyNodeImp extends SimpleNodeImp[Int, Int, Int, ExtPolicyBundle] {
|
||||
case class ExtPolicyMasterNode(w: Int)(implicit valName: ValName) extends SourceNode(ExtPolicyNodeImp)(Seq(w))
|
||||
case class ExtPolicySlaveNode()(implicit valName: ValName) extends SinkNode(ExtPolicyNodeImp)(Seq(0))
|
||||
|
||||
class XbarWithExtPolicy(nameSuffix: Option[String] = None)
|
||||
class XbarWithExtPolicy(nameSuffix: Option[String] = None, useFallback: Boolean = true)
|
||||
(implicit p: Parameters) extends TLXbar(nameSuffix = nameSuffix) {
|
||||
val policySlaveNode = ExtPolicySlaveNode()
|
||||
|
||||
class ImplChild extends Impl {
|
||||
val policy: TLArbiter.Policy = (width, valids, select) => {
|
||||
val in = policySlaveNode.in.head._1
|
||||
val hintHit = (valids & in.hint).orR
|
||||
val fallback = TLArbiter.lowestIndexFirst(width, valids, !hintHit && select)
|
||||
in.actual := select.asTypeOf(in.actual.cloneType)
|
||||
Mux(hintHit, in.hint, fallback)
|
||||
|
||||
if (useFallback) {
|
||||
val hintHit = (valids & in.hint).orR
|
||||
val fallback = TLArbiter.lowestIndexFirst(width, valids, !hintHit && select)
|
||||
Mux(hintHit, in.hint, fallback)
|
||||
} else {
|
||||
in.hint
|
||||
}
|
||||
}
|
||||
TLXbar.circuit(policy, node.in, node.out)
|
||||
}
|
||||
@@ -44,4 +49,14 @@ object XbarWithExtPolicy {
|
||||
val xbar = LazyModule(new XbarWithExtPolicy(nameSuffix))
|
||||
xbar
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object XbarWithExtPolicyNoFallback {
|
||||
def apply(nameSuffix: Option[String] = None)
|
||||
(implicit p: Parameters): (XbarWithExtPolicy, TLIdentityNode) = {
|
||||
val inIdNode = TLIdentityNode()
|
||||
val xbar = LazyModule(new XbarWithExtPolicy(nameSuffix, false))
|
||||
xbar.node :=* inIdNode
|
||||
(xbar, inIdNode)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,12 @@ import radiance.tile._
|
||||
import radiance.memory._
|
||||
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
|
||||
|
||||
sealed trait RadianceSmemSerialization
|
||||
|
||||
case object FullySerialized extends RadianceSmemSerialization
|
||||
case object CoreSerialized extends RadianceSmemSerialization
|
||||
case object NotSerialized extends RadianceSmemSerialization
|
||||
|
||||
case class RadianceSharedMemKey(address: BigInt,
|
||||
size: Int,
|
||||
numBanks: Int,
|
||||
@@ -23,7 +29,7 @@ case class RadianceSharedMemKey(address: BigInt,
|
||||
strideByWord: Boolean = true,
|
||||
filterAligned: Boolean = true,
|
||||
disableMonitors: Boolean = true,
|
||||
serializeUnaligned: Boolean = true)
|
||||
serializeUnaligned: RadianceSmemSerialization = FullySerialized)
|
||||
case object RadianceSharedMemKey extends Field[Option[RadianceSharedMemKey]](None)
|
||||
|
||||
case class RadianceFrameBufferKey(baseAddress: BigInt,
|
||||
@@ -56,7 +62,7 @@ class WithRadianceCores(
|
||||
nTLBWays = 1,
|
||||
nTLBBasePageSectors = 1,
|
||||
nTLBSuperpages = 1,
|
||||
nMSHRs = 0,
|
||||
nMSHRs = 0,
|
||||
blockBytes = site(CacheBlockBytes))),
|
||||
icache = Some(ICacheParams(
|
||||
rowBits = site(SystemBusKey).beatBits,
|
||||
@@ -194,8 +200,8 @@ class WithRadianceSharedMem(address: BigInt,
|
||||
strideByWord: Boolean = true,
|
||||
filterAligned: Boolean = true,
|
||||
disableMonitors: Boolean = true,
|
||||
serializeUnaligned: Boolean = true
|
||||
) extends Config((site, _, _) => {
|
||||
serializeUnaligned: RadianceSmemSerialization = FullySerialized
|
||||
) extends Config((_, _, _) => {
|
||||
case RadianceSharedMemKey => {
|
||||
require(isPow2(size) && size >= 1024)
|
||||
Some(RadianceSharedMemKey(
|
||||
|
||||
@@ -38,7 +38,9 @@ class RadianceCluster (
|
||||
|
||||
// TODO: this probably needs to be instantiated inside the radiance shared mem module
|
||||
def virgoSharedMemComponentsGen() = new VirgoSharedMemComponents(thisClusterParams, gemminiTiles, radianceTiles)
|
||||
LazyModule(new RadianceSharedMem(virgoSharedMemComponentsGen, clbus)).suggestName("shared_mem")
|
||||
def virgoSharedMemComponentsImpGen(outer: VirgoSharedMemComponents) = new VirgoSharedMemComponentsImp(outer)
|
||||
LazyModule(new RadianceSharedMem(
|
||||
virgoSharedMemComponentsGen, Some(virgoSharedMemComponentsImpGen(_)), clbus)).suggestName("shared_mem")
|
||||
|
||||
// direct core-accelerator connections
|
||||
val smemKey = p(RadianceSharedMemKey).get
|
||||
|
||||
@@ -4,14 +4,14 @@ import chisel3._
|
||||
import chisel3.util._
|
||||
import org.chipsalliance.diplomacy.lazymodule._
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
import radiance.memory._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.diplomacy.{AddressSet, TransferSizes}
|
||||
import gemmini.Pipeline
|
||||
import radiance.subsystem.RadianceSharedMemKey
|
||||
import gemmini._
|
||||
import radiance.memory._
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
|
||||
trait RadianceSmemNodeProvider {
|
||||
abstract class RadianceSmemNodeProvider {
|
||||
val uniformRNodes: Seq[Seq[Seq[TLNexusNode]]]
|
||||
val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]]
|
||||
val nonuniformRNodes: Seq[TLNode]
|
||||
@@ -19,8 +19,11 @@ trait RadianceSmemNodeProvider {
|
||||
val clBusClients: Seq[TLNode]
|
||||
}
|
||||
|
||||
class RadianceSharedMem(
|
||||
provider: () => RadianceSmemNodeProvider,
|
||||
abstract class RadianceSmemNodeProviderImp[T <: RadianceSmemNodeProvider](val outer: T) {}
|
||||
|
||||
class RadianceSharedMem[T <: RadianceSmemNodeProvider](
|
||||
provider: () => T,
|
||||
val providerImp: Option[(T) => RadianceSmemNodeProviderImp[T]],
|
||||
clbus: TLBusWrapper
|
||||
)(implicit p: Parameters) extends LazyModule {
|
||||
val smemKey = p(RadianceSharedMemKey).get
|
||||
@@ -31,6 +34,7 @@ class RadianceSharedMem(
|
||||
val smemDepth = smemKey.size / smemWidth / smemBanks
|
||||
val smemSubbanks = smemWidth / wordSize
|
||||
val smemSize = smemWidth * smemDepth * smemBanks
|
||||
val strideByWord = smemKey.strideByWord
|
||||
|
||||
require(isPow2(smemBanks))
|
||||
|
||||
@@ -38,11 +42,7 @@ class RadianceSharedMem(
|
||||
val (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes) =
|
||||
(smNodes.uniformRNodes, smNodes.uniformWNodes, smNodes.nonuniformRNodes, smNodes.nonuniformWNodes)
|
||||
|
||||
// TODO: move this to config
|
||||
val strideByWord = true
|
||||
val filterAligned = true
|
||||
val serializeUnaligned = true
|
||||
implicit val disableMonitors = true // otherwise it generate 1k+ different tl monitors
|
||||
implicit val disableMonitors = smemKey.disableMonitors // otherwise it generate 1k+ different tl monitors
|
||||
|
||||
// collection of read and write managers for each sram (sub)bank
|
||||
val smemBankMgrs : Seq[Seq[TLManagerNode]] = if (strideByWord) {
|
||||
@@ -180,9 +180,11 @@ class RadianceSharedMem(
|
||||
lazy val module = new RadianceSharedMemImp(this)
|
||||
}
|
||||
|
||||
class RadianceSharedMemImp(outer: RadianceSharedMem) extends LazyModuleImp(outer) {
|
||||
class RadianceSharedMemImp[T <: RadianceSmemNodeProvider](outer: RadianceSharedMem[T]) extends LazyModuleImp(outer) {
|
||||
|
||||
def makeBuffer[T <: Data](mem: TwoPortSyncMem[T], rNode: TLBundle, rEdge: TLEdgeIn,
|
||||
val smNodesImp = outer.providerImp.map(impFn => impFn(outer.smNodes))
|
||||
|
||||
def makeBuffer[U <: Data](mem: TwoPortSyncMem[U], rNode: TLBundle, rEdge: TLEdgeIn,
|
||||
wNode: TLBundle, wEdge: TLEdgeIn): Unit = {
|
||||
mem.io.ren := rNode.a.fire
|
||||
|
||||
@@ -240,7 +242,7 @@ class RadianceSharedMemImp(outer: RadianceSharedMem) extends LazyModuleImp(outer
|
||||
// WRITE
|
||||
mem.io.wen := RegNext(wNode.a.fire)
|
||||
mem.io.wdata := RegNext(wNode.a.bits.data)
|
||||
mem.io.mask := RegNext(VecInit(wNode.a.bits.mask.asBools))
|
||||
mem.io.mask := RegNext(wNode.a.bits.mask)
|
||||
|
||||
val writeResp = Wire(Flipped(wNode.d.cloneType))
|
||||
writeResp.bits := wEdge.AccessAck(wNode.a.bits)
|
||||
@@ -286,7 +288,6 @@ class RadianceSharedMemImp(outer: RadianceSharedMem) extends LazyModuleImp(outer
|
||||
val mem = TwoPortSyncMem(
|
||||
n = memDepth,
|
||||
t = UInt((wordWidth * 8).W),
|
||||
mask_len = wordWidth // byte level mask
|
||||
)
|
||||
// TODO: bring in cluster id
|
||||
// mem.suggestName(s"rad_smem_cl${outer.thisClusterParams.clusterId}_b${bid}_w${wid}")
|
||||
@@ -346,7 +347,6 @@ class RadianceSharedMemImp(outer: RadianceSharedMem) extends LazyModuleImp(outer
|
||||
val mem = TwoPortSyncMem(
|
||||
n = memDepth,
|
||||
t = UInt((memWidth * 8).W),
|
||||
mask_len = memWidth // byte level mask
|
||||
)
|
||||
|
||||
val (rNode, rEdge) = r.in.head
|
||||
|
||||
@@ -11,6 +11,7 @@ import freechips.rocketchip.diplomacy._
|
||||
import org.chipsalliance.diplomacy.lazymodule.LazyModule
|
||||
import freechips.rocketchip.prci.{ClockCrossingType, ClockSinkParameters, RationalCrossing}
|
||||
import freechips.rocketchip.regmapper.RegField
|
||||
import freechips.rocketchip.resources.BigIntHexContext
|
||||
import freechips.rocketchip.rocket._
|
||||
import freechips.rocketchip.subsystem.HierarchicalElementCrossingParamsLike
|
||||
import freechips.rocketchip.tile._
|
||||
@@ -274,6 +275,20 @@ class RadianceTile private (
|
||||
)
|
||||
}
|
||||
|
||||
val tcSmemSize = 32
|
||||
val tcSmemNodes = Seq.tabulate(2) { i =>
|
||||
TLClientNode(Seq(TLMasterPortParameters.v2(
|
||||
masters = Seq(TLMasterParameters.v2(
|
||||
name = s"rad_tc_${radianceParams.coreId}_$i",
|
||||
sourceId = IdRange(0, 1 << smemSourceWidth),
|
||||
supports = TLSlaveToMasterTransferSizes(
|
||||
probe = TransferSizes(1, tcSmemSize),
|
||||
get = TransferSizes(1, tcSmemSize),
|
||||
)
|
||||
))
|
||||
)))
|
||||
}
|
||||
|
||||
// combine outgoing per-lane dmemNode into 1 idenity node
|
||||
//
|
||||
// NOTE: We need TLWidthWidget here because there might be a data width
|
||||
@@ -673,7 +688,7 @@ class RadianceTileModuleImp(outer: RadianceTile)
|
||||
outer.smemSourceWidth,
|
||||
new VortexBundleA(tagWidth = outer.smemTagWidth, dataWidth = 32),
|
||||
new VortexBundleD(tagWidth = outer.smemTagWidth, dataWidth = 32),
|
||||
outer.smemNodes(0).out.head
|
||||
outer.smemNodes.head.out.head
|
||||
)
|
||||
)
|
||||
}
|
||||
@@ -718,6 +733,49 @@ class RadianceTileModuleImp(outer: RadianceTile)
|
||||
}
|
||||
}
|
||||
|
||||
def connectTc {
|
||||
val tcb0 = new {
|
||||
val addr = core.io.tc_a_bits_address(31, 0)
|
||||
val tag = core.io.tc_a_bits_tag(3, 0)
|
||||
val aValid = core.io.tc_a_valid(0)
|
||||
val dReady = core.io.tc_d_ready(0)
|
||||
}
|
||||
val tcb1 = new {
|
||||
val addr = core.io.tc_a_bits_address(63, 32)
|
||||
val tag = core.io.tc_a_bits_tag(7, 4)
|
||||
val aValid = core.io.tc_a_valid(1)
|
||||
val dReady = core.io.tc_d_ready(1)
|
||||
}
|
||||
val tcBundles = Seq(tcb0, tcb1)
|
||||
val adapters = (outer.tcSmemNodes zip tcBundles).zipWithIndex.map { case ((node, bundle), i) =>
|
||||
val client = node.out.head
|
||||
val adapter = Module(
|
||||
new VortexTLAdapter(
|
||||
outer.smemSourceWidth,
|
||||
new VortexBundleA(tagWidth = 1, dataWidth = 32 * 8),
|
||||
new VortexBundleD(tagWidth = 1, dataWidth = 32 * 8),
|
||||
client
|
||||
)
|
||||
)
|
||||
adapter.io.inReq.bits <> DontCare
|
||||
adapter.io.inReq.valid := bundle.aValid
|
||||
adapter.io.inReq.bits.address := bundle.addr
|
||||
adapter.io.inReq.bits.source := bundle.tag
|
||||
adapter.io.inReq.bits.size := 5.U
|
||||
adapter.io.inReq.bits.opcode := TLMessages.Get
|
||||
adapter.io.inReq.bits.mask := x"ffffffff".U
|
||||
adapter.io.inResp.ready := bundle.dReady
|
||||
|
||||
client._1.a <> adapter.io.outReq
|
||||
adapter.io.outResp <> client._1.d
|
||||
adapter
|
||||
}
|
||||
core.io.tc_a_ready := Cat(adapters.last.io.inReq.ready, adapters.head.io.inReq.ready)
|
||||
core.io.tc_d_valid := Cat(adapters.last.io.inResp.valid, adapters.head.io.inResp.valid)
|
||||
core.io.tc_d_bits_data := Cat(adapters.last.io.inResp.bits.data, adapters.head.io.inResp.bits.data)
|
||||
core.io.tc_d_bits_tag := Cat(adapters.last.io.inResp.bits.source, adapters.head.io.inResp.bits.source)
|
||||
}
|
||||
|
||||
def connectBarrier = {
|
||||
require(outer.barrierMasterNode.out.length == 1)
|
||||
// FIXME: bits not flattened
|
||||
@@ -773,6 +831,7 @@ class RadianceTileModuleImp(outer: RadianceTile)
|
||||
connectImem
|
||||
connectDmem
|
||||
connectSmem
|
||||
connectTc
|
||||
connectBarrier
|
||||
connectAccelerator
|
||||
}
|
||||
|
||||
@@ -9,8 +9,9 @@ import radiance.memory._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.diplomacy.{AddressSet, BufferParams}
|
||||
import freechips.rocketchip.subsystem.BaseClusterParams
|
||||
import radiance.subsystem.RadianceSharedMemKey
|
||||
import radiance.subsystem.{CoreSerialized, FullySerialized, NotSerialized, RadianceSharedMemKey}
|
||||
import gemmini._
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
|
||||
// virgo-specific tilelink nodes
|
||||
// generic smem implementation is in RadianceSharedMem.scala
|
||||
@@ -28,6 +29,9 @@ class VirgoSharedMemComponents(
|
||||
val smemSubbanks = smemWidth / wordSize
|
||||
val smemSize = smemWidth * smemDepth * smemBanks
|
||||
|
||||
val numCores = radianceTiles.length
|
||||
val numLanes = radianceTiles.head.numLsuLanes
|
||||
|
||||
val gemminis = gemminiTiles.map(_.gemmini)
|
||||
val gemminiConfigs = gemminis.map(_.config)
|
||||
gemminiConfigs.foreach { config =>
|
||||
@@ -54,9 +58,26 @@ class VirgoSharedMemComponents(
|
||||
smemFanoutXbar.node
|
||||
}
|
||||
}
|
||||
val tcNodeFanouts = radianceTiles.flatMap(_.tcSmemNodes)
|
||||
// .map(connectOne(_, () => TLBuffer(BufferParams(2, false, false), BufferParams(0))))
|
||||
.map(connectXbarName(_, Some("tc_fanout")))
|
||||
val clBusClients: Seq[TLNode] = radianceSmemFanout
|
||||
|
||||
val (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes) =
|
||||
// convert to monad (very fancy)
|
||||
val coreSerialOpt: Option[Unit] = serializeUnaligned match {
|
||||
case CoreSerialized => Some(())
|
||||
case _ => None
|
||||
}
|
||||
|
||||
// uniform mux select for selecting lanes from a single core in unison
|
||||
val coreSerialPolicy = coreSerialOpt.map(_ => Seq.fill(2)(Seq.fill(numLanes)(ExtPolicyMasterNode(numCores))))
|
||||
val laneSerialXbars = coreSerialOpt.map(_ => Seq.tabulate(2) { rw =>
|
||||
Seq.tabulate(numLanes) { lid =>
|
||||
XbarWithExtPolicyNoFallback(Some(f"lane_${lid}_serial_in_xbar_$rw"))
|
||||
}
|
||||
})
|
||||
|
||||
override val (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes) =
|
||||
|
||||
if (strideByWord) {
|
||||
def distAndDuplicate(nodes: Seq[TLNode], suffix: String): Seq[Seq[TLNexusNode]] = {
|
||||
@@ -68,7 +89,7 @@ class VirgoSharedMemComponents(
|
||||
dist := node
|
||||
}
|
||||
val fanout = Seq.tabulate(spSubbanks) { w =>
|
||||
val buf = TLBuffer(BufferParams(1, false, true), BufferParams(0))
|
||||
val buf = TLBuffer(BufferParams(2, false, false), BufferParams(0))
|
||||
buf := dist
|
||||
connectXbarName(buf, Some(s"spad_g${gemminiIdx}w${w}_fanout_$suffix"))
|
||||
}
|
||||
@@ -84,57 +105,41 @@ class VirgoSharedMemComponents(
|
||||
val spadSpWriteNodesSingleBank = distAndDuplicate(gemminis.map(_.spad.spad_writer.node), "ws")
|
||||
val spadSpWriteNodes = Seq.fill(smemBanks)(spadSpWriteNodesSingleBank) // executed only once
|
||||
|
||||
// tensor core read nodes
|
||||
val tcDistNodes = Seq.fill(smemBanks)(tcNodeFanouts.map(connectOne(_, () => DistributorNode(smemWidth, wordSize))))
|
||||
val tcNodes = tcDistNodes.map { tcBank =>
|
||||
Seq.fill(smemSubbanks)(tcBank.map(connectOne(_, () => TLBuffer(BufferParams(2, false, false)))).map(connectXbarName(_, Some("tc_dist_fanout"))))
|
||||
} // (banks, subbanks, tc client)
|
||||
|
||||
val unalignedRWNodes: ArrayBuffer[ArrayBuffer[TLNexusNode]] = // mutable for readability
|
||||
ArrayBuffer.fill(numLanes)(ArrayBuffer.fill(numCores)(null))
|
||||
|
||||
if (filterAligned) {
|
||||
val numLsuLanes = radianceTiles.head.numLsuLanes
|
||||
val numLaneDupes = Math.max(1, smemSubbanks / numLsuLanes)
|
||||
val filterRange = Math.min(smemSubbanks, numLsuLanes)
|
||||
println(s"num_lsu_lanes ${numLsuLanes} num_lane_dupes ${numLaneDupes} filter_range ${filterRange}")
|
||||
val numLaneDupes = Math.max(1, smemSubbanks / numLanes)
|
||||
val filterRange = Math.min(smemSubbanks, numLanes)
|
||||
|
||||
// (subbank, sources, aligned) = rw node
|
||||
val (fAligned, fUnaligned) = if (numLsuLanes >= smemSubbanks) {
|
||||
val filterNodes: Seq[Seq[(TLNode, TLNode)]] = Seq.tabulate(numLaneDupes) { did =>
|
||||
Seq.tabulate(filterRange) { wid =>
|
||||
val trueWid = did * filterRange + wid
|
||||
val address = AddressSet(smemBase + wordSize * trueWid, (smemSize - 1) - (smemSubbanks - 1) * wordSize)
|
||||
// (subbank, sources) = rw node
|
||||
val fAligned = if (numLanes >= smemSubbanks) {
|
||||
val filterNodes: Seq[Seq[TLNode]] = Seq.tabulate(filterRange) { wid =>
|
||||
val address = AddressSet(smemBase + wordSize * wid, (smemSize - 1) - (smemSubbanks - 1) * wordSize)
|
||||
|
||||
radianceSmemFanout.grouped(numLsuLanes).toList.zipWithIndex.flatMap { case (lanes, cid) =>
|
||||
lanes.zipWithIndex.flatMap { case (lane, lid) =>
|
||||
if ((lid % filterRange) == wid) {
|
||||
println(f"c${cid}_l${lid} connected to d${did}w${wid}")
|
||||
val filterNode = AlignFilterNode(Seq(address))(p, ValName(s"filter_l${lid}_w${trueWid}"))
|
||||
DisableMonitors { implicit p => filterNode := lane }
|
||||
// Seq((aligned splitter, unaligned splitter))
|
||||
Seq((
|
||||
connectOne(filterNode, () =>
|
||||
RWSplitterNode(address, s"aligned_splitter_c${cid}_l${lid}_w${trueWid}")),
|
||||
connectOne(filterNode, () =>
|
||||
RWSplitterNode(AddressSet.everything, s"unaligned_splitter_c${cid}_l${lid}"))
|
||||
))
|
||||
} else Seq()
|
||||
}
|
||||
radianceSmemFanout.grouped(numLanes).toList.zipWithIndex.flatMap { case (lanes, cid) =>
|
||||
lanes.zipWithIndex.flatMap { case (lane, lid) =>
|
||||
if ((lid % filterRange) == wid) {
|
||||
val filterNode = AlignFilterNode(Seq(address))(p, ValName(s"filter_l${lid}_w${wid}"))
|
||||
DisableMonitors { implicit p => filterNode := lane }
|
||||
|
||||
unalignedRWNodes(lid)(cid) = connectOne(filterNode, () =>
|
||||
RWSplitterNode(AddressSet.everything, s"unaligned_splitter_c${cid}_l${lid}"))
|
||||
|
||||
Seq(connectOne(filterNode, () =>
|
||||
RWSplitterNode(address, s"aligned_splitter_c${cid}_l${lid}_w${wid}")))
|
||||
} else Seq()
|
||||
}
|
||||
}
|
||||
}.flatten
|
||||
|
||||
val fAligned = Seq.fill(2)(filterNodes.map(_.map(_._1).map(connectXbarName(_, Some("rad_aligned")))))
|
||||
val fUnaligned = if (serializeUnaligned) {
|
||||
Seq.fill(2) {
|
||||
val serializedNode = TLEphemeralNode()
|
||||
val serializedInXbar = LazyModule(new TLXbar())
|
||||
val serializedOutXbar = LazyModule(new TLXbar())
|
||||
serializedInXbar.suggestName("unaligned_serialized_in_xbar")
|
||||
serializedOutXbar.suggestName("unaligned_serialized_out_xbar")
|
||||
guardMonitors { implicit p =>
|
||||
filterNodes.foreach(_.map(_._2).foreach(serializedInXbar.node := _))
|
||||
serializedNode := serializedInXbar.node
|
||||
serializedOutXbar.node := serializedNode
|
||||
}
|
||||
Seq(serializedOutXbar.node)
|
||||
}
|
||||
} else {
|
||||
Seq.fill(2)(filterNodes.flatMap(_.map(_._2).map(connectXbar.apply)))
|
||||
}
|
||||
(fAligned, fUnaligned)
|
||||
|
||||
Seq.fill(2)(filterNodes.map(_.map(connectXbarName(_, Some("rad_aligned")))))
|
||||
} else { // aligned: (subbanks, cores) = rw node
|
||||
// (lanes, cores) = filter_node
|
||||
val filterNodes = Seq.tabulate(filterRange) { wid =>
|
||||
@@ -142,7 +147,7 @@ class VirgoSharedMemComponents(
|
||||
AddressSet(smemBase + (did * filterRange + wid) * wordSize,
|
||||
(smemSize - 1) - (smemSubbanks - 1) * wordSize)
|
||||
}
|
||||
radianceSmemFanout.grouped(numLsuLanes).toSeq.zipWithIndex.map { case (lanes, cid) =>
|
||||
radianceSmemFanout.grouped(numLanes).toSeq.zipWithIndex.map { case (lanes, cid) =>
|
||||
val lane = lanes(wid)
|
||||
val filterNode = AlignFilterNode(addresses)(p, ValName(s"filter_c${cid}_w${wid}"))
|
||||
guardMonitors { implicit p =>
|
||||
@@ -160,34 +165,45 @@ class VirgoSharedMemComponents(
|
||||
}
|
||||
}
|
||||
}.flatten
|
||||
val fUnalignedRW = filterNodes.zipWithIndex.flatMap { case (cores, lid) =>
|
||||
cores.zipWithIndex.map { case (fn, cid) =>
|
||||
connectOne(fn, () => RWSplitterNode(AddressSet.everything, s"unaligned_split_c${cid}_l${lid}"))
|
||||
filterNodes.zipWithIndex.foreach { case (cores, lid) =>
|
||||
cores.zipWithIndex.foreach { case (fn, cid) =>
|
||||
unalignedRWNodes(lid)(cid) = connectOne(fn, () =>
|
||||
RWSplitterNode(AddressSet.everything, s"unaligned_split_c${cid}_l${lid}"))
|
||||
}
|
||||
}
|
||||
val fAligned = Seq.fill(2)(fAlignedRW.map(_.map(connectXbarName(_, Some("rad_aligned")))))
|
||||
Seq.fill(2)(fAlignedRW.map(_.map(connectXbarName(_, Some("rad_aligned")))))
|
||||
}
|
||||
|
||||
val fUnaligned = if (serializeUnaligned) {
|
||||
Seq.fill(2) {
|
||||
val serializedNode = TLEphemeralNode()
|
||||
val serializedInXbar = TLXbar(nameSuffix = Some("unaligned_ser_in"))
|
||||
val serializedOutXbar = TLXbar(nameSuffix = Some("unaligned_ser_out"))
|
||||
guardMonitors { implicit p =>
|
||||
fUnalignedRW.foreach(serializedInXbar := _)
|
||||
serializedNode := serializedInXbar
|
||||
serializedOutXbar := serializedNode
|
||||
}
|
||||
Seq(serializedOutXbar)
|
||||
val fUnaligned: Seq[Seq[TLNode]] = serializeUnaligned match {
|
||||
case FullySerialized => Seq.fill(2) {
|
||||
val serializedNode = TLEphemeralNode()
|
||||
val serializedInXbar = LazyModule(new TLXbar())
|
||||
val serializedOutXbar = LazyModule(new TLXbar())
|
||||
serializedInXbar.suggestName("unaligned_serialized_in_xbar")
|
||||
serializedOutXbar.suggestName("unaligned_serialized_out_xbar")
|
||||
guardMonitors { implicit p =>
|
||||
unalignedRWNodes.flatten.foreach(serializedInXbar.node := _)
|
||||
serializedNode := serializedInXbar.node
|
||||
serializedOutXbar.node := serializedNode
|
||||
}
|
||||
} else {
|
||||
Seq.fill(2)(fUnalignedRW.map(connectXbar.apply))
|
||||
Seq(serializedOutXbar.node)
|
||||
}
|
||||
(fAligned, fUnaligned)
|
||||
case CoreSerialized => Seq.tabulate(2) { rw =>
|
||||
// we can either have one core per lane selected (multiple mux selects)
|
||||
// or strictly lanes from a single selected core (one mux select). doing the latter here
|
||||
unalignedRWNodes.toSeq.zipWithIndex.map { case (coresRW, lid) =>
|
||||
val laneSerialXbar = laneSerialXbars.get(rw)(lid)
|
||||
laneSerialXbar._1.policySlaveNode := coreSerialPolicy.get(rw)(lid)
|
||||
coresRW.foreach(laneSerialXbar._2 := _)
|
||||
connectXbarName(connectOne(laneSerialXbar._1.node, TLEphemeralNode.apply), Some(s"lane_${lid}_serial_out"))
|
||||
}
|
||||
}
|
||||
case NotSerialized => Seq.fill(2)(unalignedRWNodes.toSeq.flatten.map(connectXbar.apply))
|
||||
}
|
||||
|
||||
|
||||
val uniformRNodes: Seq[Seq[Seq[TLNexusNode]]] = spadReadNodes.map { rb =>
|
||||
(rb zip fAligned.head).map { case (rw, fa) => rw ++ fa }
|
||||
val uniformRNodes: Seq[Seq[Seq[TLNexusNode]]] = (spadReadNodes zip tcNodes).map { case (rb, tcrb) =>
|
||||
(rb lazyZip tcrb lazyZip fAligned.head).map { case (rw, tcrw, fa) => rw ++ tcrw ++ fa }
|
||||
}
|
||||
val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]] = (spadWriteNodes zip spadSpWriteNodes).map { case (wb, wsb) =>
|
||||
(wb lazyZip wsb lazyZip fAligned.last).map {
|
||||
@@ -206,6 +222,8 @@ class VirgoSharedMemComponents(
|
||||
val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]] = (spadWriteNodes zip spadSpWriteNodes).map { case (wb, wsb) =>
|
||||
(wb zip wsb).map { case (ww, wsw) => ww ++ wsw }
|
||||
}
|
||||
// random accesses are not serialized here, require so
|
||||
require(serializeUnaligned == NotSerialized, "when not filtering, unaligned accesses must be serialized")
|
||||
// these nodes are random access
|
||||
val nonuniformRNodes: Seq[TLNode] = splitterNodes.map(connectXbarName(_, Some("rad_unaligned_r")))
|
||||
val nonuniformWNodes: Seq[TLNode] = splitterNodes.map(connectXbarName(_, Some("rad_unaligned_w")))
|
||||
@@ -233,3 +251,23 @@ class VirgoSharedMemComponents(
|
||||
(Seq.empty, Seq.empty, Seq(unifiedMemReadNode), Seq(unifiedMemWriteNode))
|
||||
}
|
||||
}
|
||||
|
||||
class VirgoSharedMemComponentsImp[T <: VirgoSharedMemComponents]
|
||||
(override val outer: T) extends RadianceSmemNodeProviderImp[T](outer) {
|
||||
|
||||
(outer.laneSerialXbars zip outer.coreSerialPolicy).foreach { case (xbarsRW, policiesRW) =>
|
||||
(xbarsRW zip policiesRW).foreach { case (xbars, policies) =>
|
||||
// for each lane, if any core is valid
|
||||
val coreValids = xbars.map(_._2.in.map(_._1)).transpose.map { core => VecInit(core.map(_.a.valid)).asUInt.orR }
|
||||
val select = xbars.map(_._2.out.map(_._1)).transpose.map { core => VecInit(core.map(_.a.ready)).asUInt.orR }
|
||||
val coreSelect = TLArbiter.roundRobin(outer.numCores, VecInit(coreValids).asUInt, VecInit(select).asUInt.orR)
|
||||
// TODO: roll this into XbarWithExtPolicy
|
||||
xbars.foreach { lane =>
|
||||
(lane._2.in.map(_._1) lazyZip lane._2.out.map(_._1) lazyZip coreSelect.asBools).foreach { case (li, lo, cs) =>
|
||||
lo.a.valid := li.a.valid && cs
|
||||
}
|
||||
}
|
||||
policies.foreach { _.out.head._1.hint := coreSelect }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,6 +90,15 @@ class VortexBundle(tile: RadianceTile)(implicit p: Parameters) extends CoreBundl
|
||||
val smem_d_bits_data = Input(UInt((tile.numLsuLanes * 32).W))
|
||||
val smem_d_ready = Output(UInt((tile.numLsuLanes * 1).W))
|
||||
|
||||
val tc_a_valid = Output(UInt(2.W))
|
||||
val tc_a_bits_address = Output(UInt((2 * 32).W))
|
||||
val tc_a_bits_tag = Output(UInt((2 * 4).W))
|
||||
val tc_a_ready = Input(UInt(2.W))
|
||||
val tc_d_valid = Input(UInt(2.W))
|
||||
val tc_d_bits_data = Input(UInt((2 * 32 * 8).W))
|
||||
val tc_d_bits_tag = Input(UInt((2 * 4).W))
|
||||
val tc_d_ready = Output(UInt(2.W))
|
||||
|
||||
// FIXME: hardcoded
|
||||
val barrierIdBits = tile.barrierMasterNode.out(0)._2.barrierIdBits
|
||||
val coreIdBits = tile.barrierMasterNode.out(0)._2.numCoreBits
|
||||
@@ -233,6 +242,8 @@ class Vortex(tile: RadianceTile)(implicit p: Parameters)
|
||||
// addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_arb.sv")
|
||||
// addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_unit.sv")
|
||||
|
||||
addResource("/vsrc/vortex/hw/rtl/mem/VX_tc_bus_if.sv")
|
||||
|
||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_allocator.sv")
|
||||
// addResource("/vsrc/vortex/hw/rtl/libs/VX_avs_adapter.sv")
|
||||
// addResource("/vsrc/vortex/hw/rtl/libs/VX_axi_adapter.sv")
|
||||
|
||||
Reference in New Issue
Block a user