Parametrize numWarps / numCoreLanes / numMemLanes

This commit is contained in:
Hansung Kim
2024-02-27 18:51:33 -08:00
parent ae6e739655
commit 55d00d25bb
6 changed files with 44 additions and 25 deletions

View File

@@ -17,10 +17,10 @@ trait CanHaveMemtraceCore { this: BaseSubsystem =>
// Safe to use get as WithMemtraceCore requires WithNLanes to be defined
val simtParam = p(SIMTCoreKey).get
val config = DefaultCoalescerConfig.copy(
numLanes = simtParam.nLanes,
numLanes = simtParam.nMemLanes,
numOldSrcIds = simtParam.nSrcIds
)
val numLanes = simtParam.nLanes
val numLanes = simtParam.nMemLanes
val filename = param.tracefilename
// Need to explicitly generate clock domain; see rocket-chip 8881ccd

View File

@@ -12,8 +12,13 @@ import freechips.rocketchip.tilelink._
// TODO: find better place for these
// Note: numNewSrcId is not a part of CoreParam, because the SIMT core should be agnostic to how inflight coalesced request can be genertated
case class SIMTCoreParams(nLanes: Int = 4, nSrcIds: Int = 8)
case class SIMTCoreParams(
nWarps: Int = 4, // # of warps in the core
nCoreLanes: Int = 4, // # of SIMT threads in the core
nMemLanes: Int = 4, // # of memory lanes in the memory interface to the
// cache; relates to the LSU lanes
nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes
)
case class MemtraceCoreParams(
tracefilename: String = "undefined",
traceHasSource: Boolean = false
@@ -2325,7 +2330,7 @@ class DummyDriverImp(outer: DummyDriver, config: CoalescerConfig)
// A dummy harness around the coalescer for use in VLSI flow.
// Should not instantiate any memtrace modules.
class DummyCoalescer(implicit p: Parameters) extends LazyModule {
val numLanes = p(SIMTCoreKey).get.nLanes
val numLanes = p(SIMTCoreKey).get.nMemLanes
val config = DefaultCoalescerConfig.copy(numLanes = numLanes)
val driver = LazyModule(new DummyDriver(config))
@@ -2362,7 +2367,7 @@ class DummyCoalescerTest(timeout: Int = 500000)(implicit p: Parameters)
// tracedriver --> coalescer --> tracelogger --> tlram
class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters)
extends LazyModule {
val numLanes = p(SIMTCoreKey).get.nLanes
val numLanes = p(SIMTCoreKey).get.nMemLanes
val config = DefaultCoalescerConfig.copy(numLanes = numLanes)
val driver = LazyModule(new MemTraceDriver(config, filename))
@@ -2454,7 +2459,7 @@ class TLRAMCoalescerLoggerTest(filename: String, timeout: Int = 500000)(implicit
// tracedriver --> coalescer --> tlram
class TLRAMCoalescer(implicit p: Parameters) extends LazyModule {
val numLanes = p(SIMTCoreKey).get.nLanes
val numLanes = p(SIMTCoreKey).get.nMemLanes
val config = DefaultCoalescerConfig.copy(numLanes = numLanes)
val filename = "vecadd.core1.thread4.trace"

View File

@@ -8,7 +8,7 @@ import freechips.rocketchip.subsystem.{BaseSubsystemConfig}
import freechips.rocketchip.devices.tilelink._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.util._
import radiance.subsystem.WithSimtLanes
import radiance.subsystem.WithSimtConfig
import freechips.rocketchip.unittest._
//import rocket.VortexFatBankTest
@@ -27,7 +27,7 @@ class WithCoalescingUnitTests extends Config((site, _, _) => {
// Module(new TLRAMCoalescerLoggerTest(filename="sfilter.core1.thread4.trace", timeout=timeout)),
// Module(new TLRAMCoalescerLoggerTest(filename="nearn.core1.thread4.trace", timeout=50000000 * site(TestDurationMultiplier))),
// Module(new TLRAMCoalescerLoggerTest(filename="psort.core1.thread4.trace", timeout=timeout)),
// Module(new TLRAMCoalescerLoggerTest(filename="nvbit.vecadd.n100000.filter_sm0.trace", timeout=timeout)(new WithSimtLanes(32))),
// Module(new TLRAMCoalescerLoggerTest(filename="nvbit.vecadd.n100000.filter_sm0.trace", timeout=timeout)(new WithSimtConfig(32))),
// Module(new TLRAMCoalescerLoggerTest(filename="nvbit.vecadd.n100000.filter_sm0.lane4.trace", timeout=timeout)),
) }
})
@@ -48,12 +48,12 @@ class WithCoalescingUnitSynthesisDummy(nLanes: Int) extends Config((site, _, _)
implicit val p = q
val timeout = 50000 * site(TestDurationMultiplier)
Seq(
Module(new DummyCoalescerTest(timeout=timeout)(new WithSimtLanes(nLanes=4))),
Module(new DummyCoalescerTest(timeout=timeout)(new WithSimtConfig(nMemLanes=4))),
) }
})
class CoalescingUnitTestConfig extends Config(new WithCoalescingUnitTests ++ new WithTestDuration(10) ++ new WithSimtLanes(nLanes=4) ++ new BaseSubsystemConfig)
//class VortexFatBankUnitTestConfig extends Config(new WithVortexFatBankUnitTests ++ new WithTestDuration(10) ++ new WithSimtLanes(nLanes=4) ++ new BaseSubsystemConfig)
class CoalescingUnitTestConfig extends Config(new WithCoalescingUnitTests ++ new WithTestDuration(10) ++ new WithSimtConfig(nMemLanes=4) ++ new BaseSubsystemConfig)
//class VortexFatBankUnitTestConfig extends Config(new WithVortexFatBankUnitTests ++ new WithTestDuration(10) ++ new WithSimtConfig(nLanes=4) ++ new BaseSubsystemConfig)
// Dummy configs of various sizes for synthesis
class CoalescingSynthesisDummyLane4Config extends Config(new WithCoalescingUnitSynthesisDummy(4) ++ new WithTestDuration(10) ++ new BaseSubsystemConfig)

View File

@@ -66,10 +66,13 @@ class WithFuzzerCores(
})
// `nSrcIds`: number of source IDs for dmem requests on each SIMT lane
class WithSimtLanes(nLanes: Int, nSrcIds: Int = 8) extends Config((site, _, up) => {
class WithSimtConfig(nWarps: Int = 4, nCoreLanes: Int = 4, nMemLanes: Int = 4, nSrcIds: Int = 8)
extends Config((site, _, up) => {
case SIMTCoreKey => {
Some(up(SIMTCoreKey, site).getOrElse(SIMTCoreParams()).copy(
nLanes = nLanes,
nWarps = nWarps,
nCoreLanes = nCoreLanes,
nMemLanes = nMemLanes,
nSrcIds = nSrcIds
))
}
@@ -105,7 +108,7 @@ class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, _, up) => {
class WithCoalescer(nNewSrcIds: Int = 8, enable : Boolean = true) extends Config((site, _, up) => {
case CoalescerKey => {
val (nLanes, numOldSrcIds) = up(SIMTCoreKey, site) match {
case Some(param) => (param.nLanes, param.nSrcIds)
case Some(param) => (param.nMemLanes, param.nSrcIds)
case None => (1,1)
}
@@ -182,4 +185,4 @@ class WithExtGPUMem(address: BigInt = BigInt("0x100000000", 16),
})
})
case class GPUMemParams(address: BigInt = BigInt("0x100000000", 16), size: BigInt = 0x80000000)
case class GPUMemory() extends Field[Option[GPUMemParams]](None)
case class GPUMemory() extends Field[Option[GPUMemParams]](None)

View File

@@ -60,7 +60,7 @@ class FuzzerTile private (
// val statusNode = BundleBridgeSource(() => new GroundTestStatus)
val (numLanes, numSrcIds) = p(SIMTCoreKey) match {
case Some(param) => (param.nLanes, param.nSrcIds)
case Some(param) => (param.nMemLanes, param.nSrcIds)
case None => {
require(false, "fuzzer requires SIMTCoreKey to be defined")
(0, 0)

View File

@@ -140,10 +140,21 @@ class RadianceTile private (
require(
p(SIMTCoreKey).isDefined,
"SIMTCoreKey not defined; make sure to use WithSimtLanes when using RadianceTile"
"SIMTCoreKey not defined; make sure to use WithSimtConfig when using RadianceTile"
)
val numLanes = p(SIMTCoreKey) match {
case Some(simtParam) => simtParam.nLanes
// NOTE: when changing these, remember to change +define+NUM_THREADS/WARPS in
// EXTRA_SIM_PREPROC_DEFINES as well!
val numWarps = p(SIMTCoreKey) match {
case Some(simtParam) => simtParam.nWarps
case None => 4
}
val numCoreLanes = p(SIMTCoreKey) match {
case Some(simtParam) => simtParam.nCoreLanes
case None => 4
}
val numLsuLanes = p(SIMTCoreKey) match {
case Some(simtParam) => simtParam.nMemLanes
case None => 4
}
@@ -170,13 +181,14 @@ class RadianceTile private (
val smemSourceWidth = 4 // FIXME: hardcoded
val numWarps = 4 // TODO: parametrize
// Replicates some of the logic of how Vortex determines the tag width of
// memory requests so that Chisel and Verilog are in agreement on bitwidths.
// See VX_gpu_pkg.sv
val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps))
val UUID_WIDTH = 44
val imemTagWidth = UUID_WIDTH + NW_WIDTH
val numLsuLanes = 4
// see VX_gpu_pkg.sv
val LSUQ_SIZE = 8 * (numLanes / numLsuLanes)
val LSUQ_SIZE = 8 * (numCoreLanes / numLsuLanes)
val LSUQ_TAG_BITS = log2Ceil(LSUQ_SIZE) + 1 /*DCACHE_BATCH_SEL_BITS*/
val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS
// dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH
@@ -764,7 +776,6 @@ class VortexTLAdapter(
io.outReq.bits.corrupt := 0.U
io.inReq.ready := io.outReq.ready
// VortexBundleD <> TLBundleD
// Filtering out write requests is handled inside the wrapper Verilog
io.inResp.valid := io.outResp.valid
io.inResp.bits.opcode := io.outResp.bits.opcode
io.inResp.bits.size := io.outResp.bits.size