Fix RadianceClusterConfig for flashattention

This commit is contained in:
Hansung Kim
2024-11-08 21:20:46 -08:00
parent 60df9c5123
commit 48199b0422

View File

@@ -26,6 +26,7 @@ class WithRadBootROM(address: BigInt = 0x10000, size: Int = 0x10000, hang: BigIn
class VirgoConfig extends RadianceClusterConfig class VirgoConfig extends RadianceClusterConfig
class VirgoFP16Config extends RadianceFP16ClusterConfig class VirgoFP16Config extends RadianceFP16ClusterConfig
class VirgoHopperConfig extends Radiance4CFP16ClusterConfig class VirgoHopperConfig extends Radiance4CFP16ClusterConfig
class VirgoFlashConfig extends RadianceClusterConfig
class VirgoSynConfig extends RadianceClusterSynConfig class VirgoSynConfig extends RadianceClusterSynConfig
class VirgoFP16SynConfig extends RadianceFP16ClusterSynConfig class VirgoFP16SynConfig extends RadianceFP16ClusterSynConfig
class VirgoHopperSynConfig extends Radiance4CFP16ClusterSynConfig class VirgoHopperSynConfig extends Radiance4CFP16ClusterSynConfig
@@ -60,12 +61,23 @@ class RadianceFP16ClusterConfig extends Config(
new radiance.subsystem.WithRadianceCluster(0) ++ new radiance.subsystem.WithRadianceCluster(0) ++
new RadianceBaseConfig) new RadianceBaseConfig)
class Radiance8B8WFP16ClusterConfig extends Config(
new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 16, accSizeInKB = 32, tileSize = (8, 4, 8), dataType = RadianceGemminiDataType.FP16) ++
new radiance.subsystem.WithRadianceCores(8, location = InCluster(0), tensorCoreFP16 = true, tensorCoreDecoupled = false, useVxCache = false) ++
new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 128 << 10, numBanks = 8, numWords = 8) ++
new radiance.subsystem.WithCoalescer(nNewSrcIds = 16) ++
new radiance.subsystem.WithVortexL1Banks(nBanks = 8) ++
new radiance.subsystem.WithRadianceCluster(0) ++
new RadianceBaseConfig)
class Radiance4CFP16ClusterConfig extends Config( class Radiance4CFP16ClusterConfig extends Config(
new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 16, accSizeInKB = 32, tileSize = (8, 4, 8), dataType = RadianceGemminiDataType.FP16) ++ new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 16, accSizeInKB = 32, tileSize = (8, 4, 8), dataType = RadianceGemminiDataType.FP16) ++
new radiance.subsystem.WithRadianceCores(4, location = InCluster(0), tensorCoreFP16 = true, tensorCoreDecoupled = true, useVxCache = false) ++ new radiance.subsystem.WithRadianceCores(4, location = InCluster(0), tensorCoreFP16 = true, tensorCoreDecoupled = true, useVxCache = false) ++
// new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 128 << 10, numBanks = 4, numWords = 16, // new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 128 << 10, numBanks = 4, numWords = 16,
// memType = radiance.subsystem.TwoReadOneWrite, // memType = radiance.subsystem.TwoReadOneWrite,
// serializeUnaligned = radiance.subsystem.CoreSerialized) ++ // serializeUnaligned = radiance.subsystem.CoreSerialized) ++
// NOTE: Hopper Tensor Core does not work with 16-word config due to the
// address alignment requirement
new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 128 << 10, numBanks = 4, numWords = 8) ++ new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 128 << 10, numBanks = 4, numWords = 8) ++
new radiance.subsystem.WithCoalescer(nNewSrcIds = 16) ++ new radiance.subsystem.WithCoalescer(nNewSrcIds = 16) ++
new radiance.subsystem.WithVortexL1Banks(nBanks = 8) ++ new radiance.subsystem.WithVortexL1Banks(nBanks = 8) ++
@@ -75,12 +87,12 @@ class Radiance4CFP16ClusterConfig extends Config(
class RadianceClusterConfig extends Config( class RadianceClusterConfig extends Config(
// important to keep gemmini tile before RadianceCores to ensure radiance tile id is 0-indexed // important to keep gemmini tile before RadianceCores to ensure radiance tile id is 0-indexed
new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 8, accSizeInKB = 16, tileSize = 8) ++ new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 8, accSizeInKB = 16, tileSize = 8) ++
// new radiance.subsystem.WithRadianceGemmini(location = InCluster(0), dim = 8, accSizeInKB = 16, tileSize = 8) ++ new radiance.subsystem.WithRadianceCores(4, location = InCluster(0), tensorCoreFP16 = false, tensorCoreDecoupled = false, useVxCache = false) ++
new radiance.subsystem.WithRadianceCores(4, location = InCluster(0), tensorCoreFP16 = false, tensorCoreDecoupled = true, useVxCache = false) ++
// new radiance.subsystem.WithRadianceFrameBuffer(x"ff018000", 16, 0x8000, x"ff011000", "fb0") ++ // new radiance.subsystem.WithRadianceFrameBuffer(x"ff018000", 16, 0x8000, x"ff011000", "fb0") ++
new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 256 << 10/*KBytes*/, numBanks = 4, numWords = 16, new radiance.subsystem.WithRadianceSharedMem(address = x"ff000000", size = 256 << 10/*KBytes*/, numBanks = 8, numWords = 8,
memType = radiance.subsystem.TwoReadOneWrite, // memType = radiance.subsystem.TwoReadOneWrite,
serializeUnaligned = radiance.subsystem.CoreSerialized) ++ serializeUnaligned = radiance.subsystem.CoreSerialized,
) ++
new radiance.subsystem.WithCoalescer(nNewSrcIds = 16) ++ new radiance.subsystem.WithCoalescer(nNewSrcIds = 16) ++
new radiance.subsystem.WithVortexL1Banks(nBanks = 8) ++ new radiance.subsystem.WithVortexL1Banks(nBanks = 8) ++
new radiance.subsystem.WithRadianceCluster(0) ++ new radiance.subsystem.WithRadianceCluster(0) ++