Merge pull request #1697 from ucb-bar/clock_tap

Add debug clock tap port to all default designs
This commit is contained in:
Jerry Zhao
2024-01-11 11:38:20 -08:00
committed by GitHub
12 changed files with 131 additions and 23 deletions

View File

@@ -33,6 +33,7 @@ class DigitalTop(implicit p: Parameters) extends ChipyardSystem
with chipyard.example.CanHavePeripheryStreamingPassthrough // Enables optionally adding the DSPTools streaming-passthrough example widget with chipyard.example.CanHavePeripheryStreamingPassthrough // Enables optionally adding the DSPTools streaming-passthrough example widget
with nvidia.blocks.dla.CanHavePeripheryNVDLA // Enables optionally having an NVDLA with nvidia.blocks.dla.CanHavePeripheryNVDLA // Enables optionally having an NVDLA
with chipyard.clocking.HasChipyardPRCI // Use Chipyard reset/clock distribution with chipyard.clocking.HasChipyardPRCI // Use Chipyard reset/clock distribution
with chipyard.clocking.CanHaveClockTap // Enables optionally adding a clock tap output port
with fftgenerator.CanHavePeripheryFFT // Enables optionally having an MMIO-based FFT block with fftgenerator.CanHavePeripheryFFT // Enables optionally having an MMIO-based FFT block
with constellation.soc.CanHaveGlobalNoC // Support instantiating a global NoC interconnect with constellation.soc.CanHaveGlobalNoC // Support instantiating a global NoC interconnect
{ {

View File

@@ -0,0 +1,27 @@
package chipyard.clocking
import chisel3._
import org.chipsalliance.cde.config.{Parameters, Field, Config}
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.subsystem._
import freechips.rocketchip.util._
import freechips.rocketchip.tile._
import freechips.rocketchip.prci._
case object ClockTapKey extends Field[Boolean](true)
trait CanHaveClockTap { this: BaseSubsystem =>
require(p(SubsystemDriveAsyncClockGroupsKey).isEmpty, "Subsystem asyncClockGroups must be undriven")
val clockTapNode = Option.when(p(ClockTapKey)) {
val clockTap = ClockSinkNode(Seq(ClockSinkParameters(name=Some("clock_tap"))))
clockTap := ClockGroup() := asyncClockGroupsNode
clockTap
}
val clockTapIO = clockTapNode.map { node => InModuleBody {
val clock_tap = IO(Output(Clock()))
clock_tap := node.in.head._1.clock
clock_tap
}}
}

View File

@@ -2,7 +2,7 @@ package chipyard.clocking
import chisel3._ import chisel3._
import chisel3.util._ import chisel3.util._
import chipyard.iobinders.{OverrideLazyIOBinder, GetSystemParameters, IOCellKey, ClockPort, ResetPort} import chipyard.iobinders._
import freechips.rocketchip.prci._ import freechips.rocketchip.prci._
import freechips.rocketchip.diplomacy._ import freechips.rocketchip.diplomacy._
import freechips.rocketchip.subsystem._ import freechips.rocketchip.subsystem._
@@ -14,14 +14,17 @@ import barstools.iocell.chisel._
// blocks, which allow memory-mapped control of clock division, and clock muxing // blocks, which allow memory-mapped control of clock division, and clock muxing
// between the FakePLL and the slow off-chip clock // between the FakePLL and the slow off-chip clock
// Note: This will not simulate properly with firesim // Note: This will not simulate properly with firesim
class WithPLLSelectorDividerClockGenerator extends OverrideLazyIOBinder({ // Unsetting enable will prevent the divider/selector from actually modifying the clock,
// while preserving the address map. Unsetting enable should only be done for RTL
// simulators (Verilator) which do not model reset properly
class WithPLLSelectorDividerClockGenerator(enable: Boolean = true) extends OverrideLazyIOBinder({
(system: HasChipyardPRCI) => { (system: HasChipyardPRCI) => {
// Connect the implicit clock // Connect the implicit clock
implicit val p = GetSystemParameters(system) implicit val p = GetSystemParameters(system)
val tlbus = system.asInstanceOf[BaseSubsystem].locateTLBusWrapper(system.prciParams.slaveWhere) val tlbus = system.asInstanceOf[BaseSubsystem].locateTLBusWrapper(system.prciParams.slaveWhere)
val baseAddress = system.prciParams.baseAddress val baseAddress = system.prciParams.baseAddress
val clockDivider = system.prci_ctrl_domain { LazyModule(new TLClockDivider (baseAddress + 0x20000, tlbus.beatBytes)) } val clockDivider = system.prci_ctrl_domain { LazyModule(new TLClockDivider (baseAddress + 0x20000, tlbus.beatBytes, enable=enable)) }
val clockSelector = system.prci_ctrl_domain { LazyModule(new TLClockSelector(baseAddress + 0x30000, tlbus.beatBytes)) } val clockSelector = system.prci_ctrl_domain { LazyModule(new TLClockSelector(baseAddress + 0x30000, tlbus.beatBytes, enable=enable)) }
val pllCtrl = system.prci_ctrl_domain { LazyModule(new FakePLLCtrl (baseAddress + 0x40000, tlbus.beatBytes)) } val pllCtrl = system.prci_ctrl_domain { LazyModule(new FakePLLCtrl (baseAddress + 0x40000, tlbus.beatBytes)) }
clockDivider.tlNode := system.prci_ctrl_domain { TLFragmenter(tlbus.beatBytes, tlbus.blockBytes) := system.prci_ctrl_bus.get } clockDivider.tlNode := system.prci_ctrl_domain { TLFragmenter(tlbus.beatBytes, tlbus.blockBytes) := system.prci_ctrl_bus.get }
@@ -98,3 +101,12 @@ class WithPassthroughClockGenerator extends OverrideLazyIOBinder({
} }
} }
}) })
class WithClockTapIOCells extends OverrideIOBinder({
(system: CanHaveClockTap) => {
system.clockTapIO.map { tap =>
val (clock_tap_io, clock_tap_cell) = IOCell.generateIOFromSignal(tap.getWrappedValue, "clock_tap")
(Seq(ClockTapPort(() => clock_tap_io)), clock_tap_cell)
}.getOrElse((Nil, Nil))
}
})

View File

@@ -15,11 +15,27 @@ import testchipip.clocking._
// This module adds a TileLink memory-mapped clock divider to the clock graph // This module adds a TileLink memory-mapped clock divider to the clock graph
// The output clock/reset pairs from this module should be synchronized later // The output clock/reset pairs from this module should be synchronized later
class TLClockDivider(address: BigInt, beatBytes: Int, divBits: Int = 8)(implicit p: Parameters) extends LazyModule { // If enable is unset, this will not divide the clock
// DO NOT unset enable for VLSI, or prototyping flows. The disable feature is a work around for
// some RTL simulators which do not simulate the reset synchronization properly
class TLClockDivider(address: BigInt, beatBytes: Int, divBits: Int = 8, enable: Boolean = true)(implicit p: Parameters) extends LazyModule {
val device = new SimpleDevice(s"clk-div-ctrl", Nil) val device = new SimpleDevice(s"clk-div-ctrl", Nil)
val clockNode = ClockGroupIdentityNode() val clockNode = ClockGroupIdentityNode()
val tlNode = TLRegisterNode(Seq(AddressSet(address, 4096-1)), device, "reg/control", beatBytes=beatBytes) val tlNode = TLRegisterNode(Seq(AddressSet(address, 4096-1)), device, "reg/control", beatBytes=beatBytes)
if (!enable) println(Console.RED + s"""
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
WARNING:
YOU ARE USING THE TLCLOCKDIVIDER IN
"DISABLED" MODE. THIS SHOULD ONLY BE DONE
FOR RTL SIMULATION
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
""" + Console.RESET)
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
require (clockNode.out.size == 1) require (clockNode.out.size == 1)
val sources = clockNode.in.head._1.member.data.toSeq val sources = clockNode.in.head._1.member.data.toSeq
@@ -45,13 +61,21 @@ class TLClockDivider(address: BigInt, beatBytes: Int, divBits: Int = 8)(implicit
// by setting divisor=0. The divisor signal into the ClockDividerOrPass is synchronized internally // by setting divisor=0. The divisor signal into the ClockDividerOrPass is synchronized internally
divider.io.divisor := Mux(busReset.asBool, 0.U, reg.io.q) divider.io.divisor := Mux(busReset.asBool, 0.U, reg.io.q)
divider.io.resetAsync := ResetStretcher(sources(i).clock, asyncReset, 20).asAsyncReset divider.io.resetAsync := ResetStretcher(sources(i).clock, asyncReset, 20).asAsyncReset
sinks(i)._2.clock := divider.io.clockOut
// Note this is not synchronized to the output clock, which takes time to appear if (enable) {
// so this is still asyncreset sinks(i)._2.clock := divider.io.clockOut
// Stretch the reset for 40 cycles, to give enough time to reset any downstream
// digital logic // Note this is not synchronized to the output clock, which takes time to appear
sinks(i)._2.reset := ResetStretcher(sources(i).clock, asyncReset, 40).asAsyncReset // so this is still asyncreset
// Stretch the reset for 40 cycles, to give enough time to reset any downstream
// digital logic
sinks(i)._2.reset := ResetStretcher(sources(i).clock, asyncReset, 40).asAsyncReset
} else {
// WARNING: THIS IS FOR RTL SIMULATION ONLY
sinks(i)._2.clock := sources(i).clock
sinks(i)._2.reset := sources(i).reset
}
reg reg
} }

View File

@@ -21,12 +21,30 @@ case class ClockSelNode()(implicit valName: ValName)
// This module adds a TileLink memory-mapped clock mux for each downstream clock domain // This module adds a TileLink memory-mapped clock mux for each downstream clock domain
// in the clock graph. The output clock/reset should be synchronized downstream // in the clock graph. The output clock/reset should be synchronized downstream
class TLClockSelector(address: BigInt, beatBytes: Int)(implicit p: Parameters) extends LazyModule { // If enable is unset, this will always pass through the 0'th clock
// DO NOT unset enable for VLSI, or prototyping flows. The disable feature is a work around for
// some RTL simulators which do not simulate the reset synchronization properly
class TLClockSelector(address: BigInt, beatBytes: Int, enable: Boolean = true)(implicit p: Parameters) extends LazyModule {
val device = new SimpleDevice("clk-sel-ctrl", Nil) val device = new SimpleDevice("clk-sel-ctrl", Nil)
val tlNode = TLRegisterNode(Seq(AddressSet(address, 4096-1)), device, "reg/control", beatBytes=beatBytes) val tlNode = TLRegisterNode(Seq(AddressSet(address, 4096-1)), device, "reg/control", beatBytes=beatBytes)
val clockNode = ClockSelNode() val clockNode = ClockSelNode()
if (!enable) println(Console.RED + s"""
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
WARNING:
YOU ARE USING THE TLCLOCKSELECTOR IN
"DISABLED" MODE. THIS SHOULD ONLY BE DONE
FOR RTL SIMULATION
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
""" + Console.RESET)
lazy val module = new LazyModuleImp(this) { lazy val module = new LazyModuleImp(this) {
val asyncReset = clockNode.in.map(_._1).map(_.reset).toSeq(0) val asyncReset = clockNode.in.map(_._1).map(_.reset).toSeq(0)
val clocks = clockNode.in.map(_._1).map(_.clock) val clocks = clockNode.in.map(_._1).map(_.clock)
@@ -43,10 +61,15 @@ class TLClockSelector(address: BigInt, beatBytes: Int)(implicit p: Parameters) e
val mux = ClockMutexMux(clocks).suggestName(s"${sinkName}_clkmux") val mux = ClockMutexMux(clocks).suggestName(s"${sinkName}_clkmux")
mux.io.sel := sel mux.io.sel := sel
mux.io.resetAsync := asyncReset.asAsyncReset mux.io.resetAsync := asyncReset.asAsyncReset
sinks(i).clock := mux.io.clockOut if (enable) {
// Stretch the reset for 20 cycles, to give time to reset any downstream digital logic sinks(i).clock := mux.io.clockOut
sinks(i).reset := ResetStretcher(clocks(0), asyncReset, 20).asAsyncReset // Stretch the reset for 20 cycles, to give time to reset any downstream digital logic
sinks(i).reset := ResetStretcher(clocks(0), asyncReset, 20).asAsyncReset
} else {
// WARNING: THIS IS FOR RTL SIMULATION ONLY
sinks(i).clock := clocks(0)
sinks(i).reset := asyncReset
}
reg reg
} }
tlNode.regmap((0 until sinks.size).map { i => tlNode.regmap((0 until sinks.size).map { i =>

View File

@@ -51,9 +51,10 @@ class AbstractConfig extends Config(
new chipyard.iobinders.WithUARTTSIPunchthrough ++ new chipyard.iobinders.WithUARTTSIPunchthrough ++
new chipyard.iobinders.WithNMITiedOff ++ new chipyard.iobinders.WithNMITiedOff ++
// By default, punch out IOs to the Harness new chipyard.clocking.WithClockTapIOCells ++ // Default generate a clock tapio
new chipyard.clocking.WithPassthroughClockGenerator ++ new chipyard.clocking.WithPassthroughClockGenerator ++ // Default punch out IOs to the Harness
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", Seq("sbus", "mbus", "pbus", "fbus", "cbus", "obus", "implicit"), Seq("tile"))) ++ new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", // Default merge all the bus clocks
Seq("sbus", "mbus", "pbus", "fbus", "cbus", "obus", "implicit", "clock_tap"), Seq("tile"))) ++
new chipyard.config.WithPeripheryBusFrequency(500.0) ++ // Default 500 MHz pbus new chipyard.config.WithPeripheryBusFrequency(500.0) ++ // Default 500 MHz pbus
new chipyard.config.WithControlBusFrequency(500.0) ++ // Default 500 MHz cbus new chipyard.config.WithControlBusFrequency(500.0) ++ // Default 500 MHz cbus
new chipyard.config.WithMemoryBusFrequency(500.0) ++ // Default 500 MHz mbus new chipyard.config.WithMemoryBusFrequency(500.0) ++ // Default 500 MHz mbus

View File

@@ -110,5 +110,9 @@ class TetheredChipLikeRocketConfig extends Config(
class VerilatorCITetheredChipLikeRocketConfig extends Config( class VerilatorCITetheredChipLikeRocketConfig extends Config(
new chipyard.harness.WithAbsoluteFreqHarnessClockInstantiator ++ // use absolute freqs for sims in the harness new chipyard.harness.WithAbsoluteFreqHarnessClockInstantiator ++ // use absolute freqs for sims in the harness
new chipyard.harness.WithMultiChipSerialTL(0, 1) ++ // connect the serial-tl ports of the chips together new chipyard.harness.WithMultiChipSerialTL(0, 1) ++ // connect the serial-tl ports of the chips together
new chipyard.harness.WithMultiChip(0, new chipyard.config.WithNoResetSynchronizers ++ new ChipLikeRocketConfig) ++ new chipyard.harness.WithMultiChip(0, // These fragments remove all troublesome
new chipyard.clocking.WithPLLSelectorDividerClockGenerator(enable=false) ++ // clocking features from the design
new chipyard.iobinders.WithDebugIOCells(syncReset = false) ++
new chipyard.config.WithNoResetSynchronizers ++
new ChipLikeRocketConfig) ++
new chipyard.harness.WithMultiChip(1, new ChipBringupHostConfig)) new chipyard.harness.WithMultiChip(1, new ChipBringupHostConfig))

View File

@@ -63,7 +63,7 @@ class MulticlockRocketConfig extends Config(
new freechips.rocketchip.subsystem.WithNBigCores(1) ++ new freechips.rocketchip.subsystem.WithNBigCores(1) ++
// Frequency specifications // Frequency specifications
new chipyard.config.WithTileFrequency(1000.0) ++ // Matches the maximum frequency of U540 new chipyard.config.WithTileFrequency(1000.0) ++ // Matches the maximum frequency of U540
new chipyard.clocking.WithClockGroupsCombinedByName(("uncore" , Seq("sbus", "cbus", "implicit"), Nil), new chipyard.clocking.WithClockGroupsCombinedByName(("uncore" , Seq("sbus", "cbus", "implicit", "clock_tap"), Nil),
("periphery", Seq("pbus", "fbus"), Nil)) ++ ("periphery", Seq("pbus", "fbus"), Nil)) ++
new chipyard.config.WithSystemBusFrequency(500.0) ++ // Matches the maximum frequency of U540 new chipyard.config.WithSystemBusFrequency(500.0) ++ // Matches the maximum frequency of U540
new chipyard.config.WithMemoryBusFrequency(500.0) ++ // Matches the maximum frequency of U540 new chipyard.config.WithMemoryBusFrequency(500.0) ++ // Matches the maximum frequency of U540

View File

@@ -111,14 +111,22 @@ class WithOffchipBusFrequency(freqMHz: Double) extends Config((site, here, up) =
class WithRationalMemoryBusCrossing extends WithSbusToMbusCrossingType(RationalCrossing(Symmetric)) class WithRationalMemoryBusCrossing extends WithSbusToMbusCrossingType(RationalCrossing(Symmetric))
class WithAsynchrousMemoryBusCrossing extends WithSbusToMbusCrossingType(AsynchronousCrossing()) class WithAsynchrousMemoryBusCrossing extends WithSbusToMbusCrossingType(AsynchronousCrossing())
// Remove the tile clock gaters in this system
class WithNoTileClockGaters extends Config((site, here, up) => { class WithNoTileClockGaters extends Config((site, here, up) => {
case ChipyardPRCIControlKey => up(ChipyardPRCIControlKey).copy(enableTileClockGating = false) case ChipyardPRCIControlKey => up(ChipyardPRCIControlKey).copy(enableTileClockGating = false)
}) })
// Remove the tile reset control blocks in this system
class WithNoTileResetSetters extends Config((site, here, up) => { class WithNoTileResetSetters extends Config((site, here, up) => {
case ChipyardPRCIControlKey => up(ChipyardPRCIControlKey).copy(enableTileResetSetting = false) case ChipyardPRCIControlKey => up(ChipyardPRCIControlKey).copy(enableTileResetSetting = false)
}) })
// Remove the global reset synchronizers in this system
class WithNoResetSynchronizers extends Config((site, here, up) => { class WithNoResetSynchronizers extends Config((site, here, up) => {
case ChipyardPRCIControlKey => up(ChipyardPRCIControlKey).copy(enableResetSynchronizers = false) case ChipyardPRCIControlKey => up(ChipyardPRCIControlKey).copy(enableResetSynchronizers = false)
}) })
// Remove any ClockTap ports in this system
class WithNoClockTap extends Config((site, here, up) => {
case ClockTapKey => false
})

View File

@@ -285,7 +285,9 @@ class JTAGChipIO extends Bundle {
val TDO = Output(Bool()) val TDO = Output(Bool())
} }
class WithDebugIOCells extends OverrideLazyIOBinder({ // WARNING: Don't disable syncReset unless you are trying to
// get around bugs in RTL simulators
class WithDebugIOCells(syncReset: Boolean = true) extends OverrideLazyIOBinder({
(system: HasPeripheryDebug) => { (system: HasPeripheryDebug) => {
implicit val p = GetSystemParameters(system) implicit val p = GetSystemParameters(system)
val tlbus = system.asInstanceOf[BaseSubsystem].locateTLBusWrapper(p(ExportDebug).slaveWhere) val tlbus = system.asInstanceOf[BaseSubsystem].locateTLBusWrapper(p(ExportDebug).slaveWhere)
@@ -309,7 +311,7 @@ class WithDebugIOCells extends OverrideLazyIOBinder({
d.disableDebug.foreach { d => d := false.B } d.disableDebug.foreach { d => d := false.B }
// Drive JTAG on-chip IOs // Drive JTAG on-chip IOs
d.systemjtag.map { j => d.systemjtag.map { j =>
j.reset := ResetCatchAndSync(j.jtag.TCK, clockBundle.reset.asBool) j.reset := (if (syncReset) ResetCatchAndSync(j.jtag.TCK, clockBundle.reset.asBool) else clockBundle.reset.asBool)
j.mfr_id := p(JtagDTMKey).idcodeManufId.U(11.W) j.mfr_id := p(JtagDTMKey).idcodeManufId.U(11.W)
j.part_number := p(JtagDTMKey).idcodePartNum.U(16.W) j.part_number := p(JtagDTMKey).idcodePartNum.U(16.W)
j.version := p(JtagDTMKey).idcodeVersion.U(4.W) j.version := p(JtagDTMKey).idcodeVersion.U(4.W)

View File

@@ -90,6 +90,9 @@ case class CustomBootPort (val getIO: () => Bool)
case class ClockPort (val getIO: () => Clock, val freqMHz: Double) case class ClockPort (val getIO: () => Clock, val freqMHz: Double)
extends Port[Clock] extends Port[Clock]
case class ClockTapPort (val getIO: () => Clock)
extends Port[Clock]
case class ResetPort (val getIO: () => AsyncReset) case class ResetPort (val getIO: () => AsyncReset)
extends Port[Reset] extends Port[Reset]

View File

@@ -85,6 +85,7 @@ class WithMinimalFireSimDesignTweaks extends Config(
new chipyard.harness.WithHarnessBinderClockFreqMHz(1000.0) ++ new chipyard.harness.WithHarnessBinderClockFreqMHz(1000.0) ++
new chipyard.harness.WithClockFromHarness ++ new chipyard.harness.WithClockFromHarness ++
new chipyard.harness.WithResetFromHarness ++ new chipyard.harness.WithResetFromHarness ++
new chipyard.config.WithNoClockTap ++
new chipyard.clocking.WithPassthroughClockGenerator ++ new chipyard.clocking.WithPassthroughClockGenerator ++
// Required*: When using FireSim-as-top to provide a correct path to the target bootrom source // Required*: When using FireSim-as-top to provide a correct path to the target bootrom source
new WithBootROM ++ new WithBootROM ++
@@ -99,6 +100,8 @@ class WithMinimalFireSimDesignTweaks extends Config(
// Non-frequency tweaks that are generally applied to all firesim configs // Non-frequency tweaks that are generally applied to all firesim configs
class WithFireSimDesignTweaks extends Config( class WithFireSimDesignTweaks extends Config(
new WithMinimalFireSimDesignTweaks ++ new WithMinimalFireSimDesignTweaks ++
// Required: Remove the debug clock tap, this breaks compilation of target-level sim in FireSim
new chipyard.config.WithNoClockTap ++
// Required: Bake in the default FASED memory model // Required: Bake in the default FASED memory model
new WithDefaultMemModel ++ new WithDefaultMemModel ++
// Optional: reduce the width of the Serial TL interface // Optional: reduce the width of the Serial TL interface