tensor: Fix wrong addressGen that used bits not bytes

This commit is contained in:
Hansung Kim
2024-10-28 22:27:13 -07:00
parent 1ae1965580
commit daabeb03ab

View File

@@ -202,8 +202,9 @@ class TensorCoreDecoupled(
// Address generation
//
def addressGen(base: UInt, set: UInt, index: UInt): UInt = {
// note that both A and B are K-major to facilitate bank conflict-free SMEM
// accesses, so that below code applies to both.
// A is assumed to be block-wise M-major, and B block-wise N-major, to
// facilitate bank conflict-free SMEM accesses. With these layouts, the
// same code below works for both A and B.
//
// a "block" is the 4*8 byte-sized contiguous memory that can be read in
// one SMEM request. The A and B matrix is assumed to be stored in
@@ -211,8 +212,7 @@ class TensorCoreDecoupled(
val blockRow = set
val blockCol = index
val blockIndex = (blockRow << indexBits) + blockCol
val blockSize = numLanes * laneWidth
require(blockSize == memWidth)
val blockSize = numLanes * (laneWidth / 8/*bits*/)
val blockSizeBits = log2Ceil(blockSize)
val byteOffset = blockIndex << blockSizeBits
base + byteOffset