Distinguish LSU lanes from SIMD lanes and elaborate tag width logic
This commit is contained in:
@@ -45,11 +45,11 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle
|
||||
val a = Decoupled(new VortexBundleA(tagWidth = tile.imemTagWidth, dataWidth = 32))
|
||||
val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.imemTagWidth, dataWidth = 32)))
|
||||
})) else None
|
||||
val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle {
|
||||
val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLsuLanes, new Bundle {
|
||||
// val a = Decoupled(new VortexBundleA(tagWidth = tile.dmemTagWidth, dataWidth = 32))
|
||||
// val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32)))
|
||||
})) else None
|
||||
val smem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle {
|
||||
val smem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLsuLanes, new Bundle {
|
||||
// val a = Decoupled(new VortexBundleA(tagWidth = tile.smemTagWidth, dataWidth = 32))
|
||||
// val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.smemTagWidth, dataWidth = 32)))
|
||||
})) else None
|
||||
@@ -61,40 +61,40 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle
|
||||
}) else None
|
||||
|
||||
// Chisel doesn't support 2-D array in BlackBox interface to Verilog, so
|
||||
// everything needs to be 1-D flattened UInt with their widths configurable by numLanes.
|
||||
// everything needs to be 1-D flattened UInt with their widths configurable by numLSULanes.
|
||||
//
|
||||
// FIXME: hardcoded bitwidths
|
||||
val dmem_a_ready = Input(UInt((tile.numLanes * 1).W))
|
||||
val dmem_a_valid = Output(UInt((tile.numLanes * 1).W))
|
||||
val dmem_a_bits_opcode = Output(UInt((tile.numLanes * 3).W))
|
||||
val dmem_a_bits_size = Output(UInt((tile.numLanes * 4).W))
|
||||
val dmem_a_bits_source = Output(UInt((tile.numLanes * tile.dmemTagWidth).W))
|
||||
val dmem_a_bits_address = Output(UInt((tile.numLanes * 32).W))
|
||||
val dmem_a_bits_mask = Output(UInt((tile.numLanes * 4).W))
|
||||
val dmem_a_bits_data = Output(UInt((tile.numLanes * 32).W))
|
||||
val dmem_a_ready = Input(UInt((tile.numLsuLanes * 1).W))
|
||||
val dmem_a_valid = Output(UInt((tile.numLsuLanes * 1).W))
|
||||
val dmem_a_bits_opcode = Output(UInt((tile.numLsuLanes * 3).W))
|
||||
val dmem_a_bits_size = Output(UInt((tile.numLsuLanes * 4).W))
|
||||
val dmem_a_bits_source = Output(UInt((tile.numLsuLanes * tile.dmemTagWidth).W))
|
||||
val dmem_a_bits_address = Output(UInt((tile.numLsuLanes * 32).W))
|
||||
val dmem_a_bits_mask = Output(UInt((tile.numLsuLanes * 4).W))
|
||||
val dmem_a_bits_data = Output(UInt((tile.numLsuLanes * 32).W))
|
||||
|
||||
val dmem_d_valid = Input(UInt((tile.numLanes * 1).W))
|
||||
val dmem_d_bits_opcode = Input(UInt((tile.numLanes * 3).W))
|
||||
val dmem_d_bits_size = Input(UInt((tile.numLanes * 4).W))
|
||||
val dmem_d_bits_source = Input(UInt((tile.numLanes * tile.dmemTagWidth).W))
|
||||
val dmem_d_bits_data = Input(UInt((tile.numLanes * 32).W))
|
||||
val dmem_d_ready = Output(UInt((tile.numLanes * 1).W))
|
||||
val dmem_d_valid = Input(UInt((tile.numLsuLanes * 1).W))
|
||||
val dmem_d_bits_opcode = Input(UInt((tile.numLsuLanes * 3).W))
|
||||
val dmem_d_bits_size = Input(UInt((tile.numLsuLanes * 4).W))
|
||||
val dmem_d_bits_source = Input(UInt((tile.numLsuLanes * tile.dmemTagWidth).W))
|
||||
val dmem_d_bits_data = Input(UInt((tile.numLsuLanes * 32).W))
|
||||
val dmem_d_ready = Output(UInt((tile.numLsuLanes * 1).W))
|
||||
|
||||
val smem_a_ready = Input(UInt((tile.numLanes * 1).W))
|
||||
val smem_a_valid = Output(UInt((tile.numLanes * 1).W))
|
||||
val smem_a_bits_opcode = Output(UInt((tile.numLanes * 3).W))
|
||||
val smem_a_bits_size = Output(UInt((tile.numLanes * 4).W))
|
||||
val smem_a_bits_source = Output(UInt((tile.numLanes * tile.smemTagWidth).W))
|
||||
val smem_a_bits_address = Output(UInt((tile.numLanes * 32).W))
|
||||
val smem_a_bits_mask = Output(UInt((tile.numLanes * 4).W))
|
||||
val smem_a_bits_data = Output(UInt((tile.numLanes * 32).W))
|
||||
val smem_a_ready = Input(UInt((tile.numLsuLanes * 1).W))
|
||||
val smem_a_valid = Output(UInt((tile.numLsuLanes * 1).W))
|
||||
val smem_a_bits_opcode = Output(UInt((tile.numLsuLanes * 3).W))
|
||||
val smem_a_bits_size = Output(UInt((tile.numLsuLanes * 4).W))
|
||||
val smem_a_bits_source = Output(UInt((tile.numLsuLanes * tile.smemTagWidth).W))
|
||||
val smem_a_bits_address = Output(UInt((tile.numLsuLanes * 32).W))
|
||||
val smem_a_bits_mask = Output(UInt((tile.numLsuLanes * 4).W))
|
||||
val smem_a_bits_data = Output(UInt((tile.numLsuLanes * 32).W))
|
||||
|
||||
val smem_d_valid = Input(UInt((tile.numLanes * 1).W))
|
||||
val smem_d_bits_opcode = Input(UInt((tile.numLanes * 3).W))
|
||||
val smem_d_bits_size = Input(UInt((tile.numLanes * 4).W))
|
||||
val smem_d_bits_source = Input(UInt((tile.numLanes * tile.smemTagWidth).W))
|
||||
val smem_d_bits_data = Input(UInt((tile.numLanes * 32).W))
|
||||
val smem_d_ready = Output(UInt((tile.numLanes * 1).W))
|
||||
val smem_d_valid = Input(UInt((tile.numLsuLanes * 1).W))
|
||||
val smem_d_bits_opcode = Input(UInt((tile.numLsuLanes * 3).W))
|
||||
val smem_d_bits_size = Input(UInt((tile.numLsuLanes * 4).W))
|
||||
val smem_d_bits_source = Input(UInt((tile.numLsuLanes * tile.smemTagWidth).W))
|
||||
val smem_d_bits_data = Input(UInt((tile.numLsuLanes * 32).W))
|
||||
val smem_d_ready = Output(UInt((tile.numLsuLanes * 1).W))
|
||||
|
||||
// val fpu = Flipped(new FPUCoreIO())
|
||||
//val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs))
|
||||
@@ -114,7 +114,7 @@ class Vortex(tile: VortexTile)(implicit p: Parameters)
|
||||
"CORE_ID" -> tile.tileParams.hartId,
|
||||
// TODO: can we get this as a parameter?
|
||||
"BOOTROM_HANG100" -> 0x10100,
|
||||
"NUM_THREADS" -> tile.numLanes
|
||||
"NUM_THREADS" -> tile.numLsuLanes
|
||||
)
|
||||
)
|
||||
with HasBlackBoxResource {
|
||||
|
||||
@@ -189,12 +189,14 @@ class VortexTile private (
|
||||
|
||||
val smemSourceWidth = 4 // FIXME: hardcoded
|
||||
|
||||
// TODO: parametrize
|
||||
val numWarps = 4
|
||||
val numWarps = 4 // TODO: parametrize
|
||||
val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps))
|
||||
val UUID_WIDTH = 44
|
||||
val imemTagWidth = UUID_WIDTH + NW_WIDTH
|
||||
val LSUQ_TAG_BITS = 4
|
||||
val numLsuLanes = 4
|
||||
// see VX_gpu_pkg.sv
|
||||
val LSUQ_SIZE = 8 * (numLanes / numLsuLanes)
|
||||
val LSUQ_TAG_BITS = log2Ceil(LSUQ_SIZE) + 1 /*DCACHE_BATCH_SEL_BITS*/
|
||||
val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS
|
||||
// dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH
|
||||
val smemTagWidth = dmemTagWidth
|
||||
@@ -218,7 +220,7 @@ class VortexTile private (
|
||||
)
|
||||
}
|
||||
|
||||
val dmemNodes = Seq.tabulate(numLanes) { i =>
|
||||
val dmemNodes = Seq.tabulate(numLsuLanes) { i =>
|
||||
TLClientNode(
|
||||
Seq(
|
||||
TLMasterPortParameters.v1(
|
||||
@@ -241,7 +243,7 @@ class VortexTile private (
|
||||
)
|
||||
}
|
||||
|
||||
val smemNodes = Seq.tabulate(numLanes) { i =>
|
||||
val smemNodes = Seq.tabulate(numLsuLanes) { i =>
|
||||
TLClientNode(
|
||||
Seq(
|
||||
TLMasterPortParameters.v1(
|
||||
@@ -337,12 +339,12 @@ class VortexTile private (
|
||||
//
|
||||
// Instantiate the same number of banks as there are lanes.
|
||||
// TODO: parametrize
|
||||
val smemBanks = Seq.tabulate(numLanes) { bankId =>
|
||||
val smemBanks = Seq.tabulate(numLsuLanes) { bankId =>
|
||||
// Banked-by-word (4 bytes)
|
||||
// base for bank 1: ff...000000|01|00
|
||||
// mask for bank 1; 00...111111|00|11
|
||||
val base = 0xff000000L | (bankId * 4 /*wordSize*/ )
|
||||
val mask = 0x00ffffffL ^ ((numLanes - 1) * 4 /*wordSize*/ )
|
||||
val mask = 0x00ffffffL ^ ((numLsuLanes - 1) * 4 /*wordSize*/ )
|
||||
LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = 4 /*wordSize*/ ))
|
||||
}
|
||||
// smem lanes-to-banks crossbar
|
||||
@@ -531,7 +533,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
|
||||
// @perf: this would duplicate SourceGenerator table for every lane and eat
|
||||
// up some area
|
||||
val dmemTLBundles = outer.dmemNodes.map(_.out.head._1)
|
||||
val dmemTLAdapters = Seq.tabulate(outer.numLanes) { _ =>
|
||||
val dmemTLAdapters = Seq.tabulate(outer.numLsuLanes) { _ =>
|
||||
Module(
|
||||
new VortexTLAdapter(
|
||||
outer.dmemSourceWidth,
|
||||
@@ -565,7 +567,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
|
||||
new RRArbiter(
|
||||
// FIXME: should really be source on D channel
|
||||
new VortexBundleA(tagWidth = outer.dmemTagWidth, dataWidth = 32).source.cloneType,
|
||||
outer.numLanes
|
||||
outer.numLsuLanes
|
||||
)
|
||||
)
|
||||
arb.io.out.ready := true.B
|
||||
@@ -574,7 +576,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
|
||||
arbIn.valid := vxDmem.valid
|
||||
arbIn.bits := vxDmem.bits.source
|
||||
}
|
||||
val matchingSources = Wire(UInt(outer.numLanes.W))
|
||||
val matchingSources = Wire(UInt(outer.numLsuLanes.W))
|
||||
matchingSources := dmemBundles
|
||||
.map(b =>
|
||||
// If there is no valid response pending across all lanes,
|
||||
@@ -609,7 +611,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
|
||||
core.io.dmem_d_bits_data := dmemTLAdapters.map(_.io.inResp.bits.data).asUInt
|
||||
|
||||
// override response channel with matchingSources
|
||||
val dmem_d_valid_vec = Wire(Vec(outer.numLanes, Bool()))
|
||||
val dmem_d_valid_vec = Wire(Vec(outer.numLsuLanes, Bool()))
|
||||
dmemTLAdapters.zipWithIndex.foreach {
|
||||
case (tlAdapter, i) =>
|
||||
dmem_d_valid_vec(i) := tlAdapter.io.inResp.valid && matchingSources(i)
|
||||
@@ -632,7 +634,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
|
||||
// @perf: this would duplicate SourceGenerator table for every lane and eat
|
||||
// up some area
|
||||
val smemTLBundles = outer.smemNodes.map(_.out.head._1)
|
||||
val smemTLAdapters = Seq.tabulate(outer.numLanes) { _ =>
|
||||
val smemTLAdapters = Seq.tabulate(outer.numLsuLanes) { _ =>
|
||||
Module(
|
||||
new VortexTLAdapter(
|
||||
outer.smemSourceWidth,
|
||||
|
||||
Reference in New Issue
Block a user