Fix RadianceClusterConfig for flashattention

This commit is contained in:
Hansung Kim
2024-11-08 21:20:46 -08:00
parent 60df9c5123
commit 48199b0422

View File

@@ -26,6 +26,7 @@ class WithRadBootROM(address: BigInt = 0x10000, size: Int = 0x10000, hang: BigIn
class VirgoConfig extends RadianceClusterConfig
class VirgoFP16Config extends RadianceFP16ClusterConfig
class VirgoHopperConfig extends Radiance4CFP16ClusterConfig
class VirgoFlashConfig extends RadianceClusterConfig
class VirgoSynConfig extends RadianceClusterSynConfig
class VirgoFP16SynConfig extends RadianceFP16ClusterSynConfig
class VirgoHopperSynConfig extends Radiance4CFP16ClusterSynConfig
@@ -60,12 +61,23 @@ class RadianceFP16ClusterConfig extends Config(
new radiance.subsystem.WithRadianceCluster(0) ++
new RadianceBaseConfig)
class Radiance8B8WFP16ClusterConfig extends Config(
new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 16, accSizeInKB = 32, tileSize = (8, 4, 8), dataType = RadianceGemminiDataType.FP16) ++
new radiance.subsystem.WithRadianceCores(8, location = InCluster(0), tensorCoreFP16 = true, tensorCoreDecoupled = false, useVxCache = false) ++
new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 128 << 10, numBanks = 8, numWords = 8) ++
new radiance.subsystem.WithCoalescer(nNewSrcIds = 16) ++
new radiance.subsystem.WithVortexL1Banks(nBanks = 8) ++
new radiance.subsystem.WithRadianceCluster(0) ++
new RadianceBaseConfig)
class Radiance4CFP16ClusterConfig extends Config(
new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 16, accSizeInKB = 32, tileSize = (8, 4, 8), dataType = RadianceGemminiDataType.FP16) ++
new radiance.subsystem.WithRadianceCores(4, location = InCluster(0), tensorCoreFP16 = true, tensorCoreDecoupled = true, useVxCache = false) ++
// new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 128 << 10, numBanks = 4, numWords = 16,
// memType = radiance.subsystem.TwoReadOneWrite,
// serializeUnaligned = radiance.subsystem.CoreSerialized) ++
// NOTE: Hopper Tensor Core does not work with 16-word config due to the
// address alignment requirement
new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 128 << 10, numBanks = 4, numWords = 8) ++
new radiance.subsystem.WithCoalescer(nNewSrcIds = 16) ++
new radiance.subsystem.WithVortexL1Banks(nBanks = 8) ++
@@ -75,12 +87,12 @@ class Radiance4CFP16ClusterConfig extends Config(
class RadianceClusterConfig extends Config(
// important to keep gemmini tile before RadianceCores to ensure radiance tile id is 0-indexed
new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 8, accSizeInKB = 16, tileSize = 8) ++
// new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 8, accSizeInKB = 16, tileSize = 8) ++
new radiance.subsystem.WithRadianceCores(4, location = InCluster(0), tensorCoreFP16 = false, tensorCoreDecoupled = true, useVxCache = false) ++
new radiance.subsystem.WithRadianceCores(4, location = InCluster(0), tensorCoreFP16 = false, tensorCoreDecoupled = false, useVxCache = false) ++
// new radiance.subsystem.WithRadianceFrameBuffer(x"ff018000", 16, 0x8000, x"ff011000", "fb0") ++
new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 256 << 10/*KBytes*/, numBanks = 4, numWords = 16,
memType = radiance.subsystem.TwoReadOneWrite,
serializeUnaligned = radiance.subsystem.CoreSerialized) ++
new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 256 << 10/*KBytes*/, numBanks = 8, numWords = 8,
// memType = radiance.subsystem.TwoReadOneWrite,
serializeUnaligned = radiance.subsystem.CoreSerialized,
) ++
new radiance.subsystem.WithCoalescer(nNewSrcIds = 16) ++
new radiance.subsystem.WithVortexL1Banks(nBanks = 8) ++
new radiance.subsystem.WithRadianceCluster(0) ++