Distinguish LSU lanes from SIMD lanes and elaborate tag width logic
This commit is contained in:
@@ -45,11 +45,11 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle
|
|||||||
val a = Decoupled(new VortexBundleA(tagWidth = tile.imemTagWidth, dataWidth = 32))
|
val a = Decoupled(new VortexBundleA(tagWidth = tile.imemTagWidth, dataWidth = 32))
|
||||||
val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.imemTagWidth, dataWidth = 32)))
|
val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.imemTagWidth, dataWidth = 32)))
|
||||||
})) else None
|
})) else None
|
||||||
val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle {
|
val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLsuLanes, new Bundle {
|
||||||
// val a = Decoupled(new VortexBundleA(tagWidth = tile.dmemTagWidth, dataWidth = 32))
|
// val a = Decoupled(new VortexBundleA(tagWidth = tile.dmemTagWidth, dataWidth = 32))
|
||||||
// val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32)))
|
// val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32)))
|
||||||
})) else None
|
})) else None
|
||||||
val smem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle {
|
val smem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLsuLanes, new Bundle {
|
||||||
// val a = Decoupled(new VortexBundleA(tagWidth = tile.smemTagWidth, dataWidth = 32))
|
// val a = Decoupled(new VortexBundleA(tagWidth = tile.smemTagWidth, dataWidth = 32))
|
||||||
// val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.smemTagWidth, dataWidth = 32)))
|
// val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.smemTagWidth, dataWidth = 32)))
|
||||||
})) else None
|
})) else None
|
||||||
@@ -61,40 +61,40 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle
|
|||||||
}) else None
|
}) else None
|
||||||
|
|
||||||
// Chisel doesn't support 2-D array in BlackBox interface to Verilog, so
|
// Chisel doesn't support 2-D array in BlackBox interface to Verilog, so
|
||||||
// everything needs to be 1-D flattened UInt with their widths configurable by numLanes.
|
// everything needs to be 1-D flattened UInt with their widths configurable by numLSULanes.
|
||||||
//
|
//
|
||||||
// FIXME: hardcoded bitwidths
|
// FIXME: hardcoded bitwidths
|
||||||
val dmem_a_ready = Input(UInt((tile.numLanes * 1).W))
|
val dmem_a_ready = Input(UInt((tile.numLsuLanes * 1).W))
|
||||||
val dmem_a_valid = Output(UInt((tile.numLanes * 1).W))
|
val dmem_a_valid = Output(UInt((tile.numLsuLanes * 1).W))
|
||||||
val dmem_a_bits_opcode = Output(UInt((tile.numLanes * 3).W))
|
val dmem_a_bits_opcode = Output(UInt((tile.numLsuLanes * 3).W))
|
||||||
val dmem_a_bits_size = Output(UInt((tile.numLanes * 4).W))
|
val dmem_a_bits_size = Output(UInt((tile.numLsuLanes * 4).W))
|
||||||
val dmem_a_bits_source = Output(UInt((tile.numLanes * tile.dmemTagWidth).W))
|
val dmem_a_bits_source = Output(UInt((tile.numLsuLanes * tile.dmemTagWidth).W))
|
||||||
val dmem_a_bits_address = Output(UInt((tile.numLanes * 32).W))
|
val dmem_a_bits_address = Output(UInt((tile.numLsuLanes * 32).W))
|
||||||
val dmem_a_bits_mask = Output(UInt((tile.numLanes * 4).W))
|
val dmem_a_bits_mask = Output(UInt((tile.numLsuLanes * 4).W))
|
||||||
val dmem_a_bits_data = Output(UInt((tile.numLanes * 32).W))
|
val dmem_a_bits_data = Output(UInt((tile.numLsuLanes * 32).W))
|
||||||
|
|
||||||
val dmem_d_valid = Input(UInt((tile.numLanes * 1).W))
|
val dmem_d_valid = Input(UInt((tile.numLsuLanes * 1).W))
|
||||||
val dmem_d_bits_opcode = Input(UInt((tile.numLanes * 3).W))
|
val dmem_d_bits_opcode = Input(UInt((tile.numLsuLanes * 3).W))
|
||||||
val dmem_d_bits_size = Input(UInt((tile.numLanes * 4).W))
|
val dmem_d_bits_size = Input(UInt((tile.numLsuLanes * 4).W))
|
||||||
val dmem_d_bits_source = Input(UInt((tile.numLanes * tile.dmemTagWidth).W))
|
val dmem_d_bits_source = Input(UInt((tile.numLsuLanes * tile.dmemTagWidth).W))
|
||||||
val dmem_d_bits_data = Input(UInt((tile.numLanes * 32).W))
|
val dmem_d_bits_data = Input(UInt((tile.numLsuLanes * 32).W))
|
||||||
val dmem_d_ready = Output(UInt((tile.numLanes * 1).W))
|
val dmem_d_ready = Output(UInt((tile.numLsuLanes * 1).W))
|
||||||
|
|
||||||
val smem_a_ready = Input(UInt((tile.numLanes * 1).W))
|
val smem_a_ready = Input(UInt((tile.numLsuLanes * 1).W))
|
||||||
val smem_a_valid = Output(UInt((tile.numLanes * 1).W))
|
val smem_a_valid = Output(UInt((tile.numLsuLanes * 1).W))
|
||||||
val smem_a_bits_opcode = Output(UInt((tile.numLanes * 3).W))
|
val smem_a_bits_opcode = Output(UInt((tile.numLsuLanes * 3).W))
|
||||||
val smem_a_bits_size = Output(UInt((tile.numLanes * 4).W))
|
val smem_a_bits_size = Output(UInt((tile.numLsuLanes * 4).W))
|
||||||
val smem_a_bits_source = Output(UInt((tile.numLanes * tile.smemTagWidth).W))
|
val smem_a_bits_source = Output(UInt((tile.numLsuLanes * tile.smemTagWidth).W))
|
||||||
val smem_a_bits_address = Output(UInt((tile.numLanes * 32).W))
|
val smem_a_bits_address = Output(UInt((tile.numLsuLanes * 32).W))
|
||||||
val smem_a_bits_mask = Output(UInt((tile.numLanes * 4).W))
|
val smem_a_bits_mask = Output(UInt((tile.numLsuLanes * 4).W))
|
||||||
val smem_a_bits_data = Output(UInt((tile.numLanes * 32).W))
|
val smem_a_bits_data = Output(UInt((tile.numLsuLanes * 32).W))
|
||||||
|
|
||||||
val smem_d_valid = Input(UInt((tile.numLanes * 1).W))
|
val smem_d_valid = Input(UInt((tile.numLsuLanes * 1).W))
|
||||||
val smem_d_bits_opcode = Input(UInt((tile.numLanes * 3).W))
|
val smem_d_bits_opcode = Input(UInt((tile.numLsuLanes * 3).W))
|
||||||
val smem_d_bits_size = Input(UInt((tile.numLanes * 4).W))
|
val smem_d_bits_size = Input(UInt((tile.numLsuLanes * 4).W))
|
||||||
val smem_d_bits_source = Input(UInt((tile.numLanes * tile.smemTagWidth).W))
|
val smem_d_bits_source = Input(UInt((tile.numLsuLanes * tile.smemTagWidth).W))
|
||||||
val smem_d_bits_data = Input(UInt((tile.numLanes * 32).W))
|
val smem_d_bits_data = Input(UInt((tile.numLsuLanes * 32).W))
|
||||||
val smem_d_ready = Output(UInt((tile.numLanes * 1).W))
|
val smem_d_ready = Output(UInt((tile.numLsuLanes * 1).W))
|
||||||
|
|
||||||
// val fpu = Flipped(new FPUCoreIO())
|
// val fpu = Flipped(new FPUCoreIO())
|
||||||
//val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs))
|
//val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs))
|
||||||
@@ -114,7 +114,7 @@ class Vortex(tile: VortexTile)(implicit p: Parameters)
|
|||||||
"CORE_ID" -> tile.tileParams.hartId,
|
"CORE_ID" -> tile.tileParams.hartId,
|
||||||
// TODO: can we get this as a parameter?
|
// TODO: can we get this as a parameter?
|
||||||
"BOOTROM_HANG100" -> 0x10100,
|
"BOOTROM_HANG100" -> 0x10100,
|
||||||
"NUM_THREADS" -> tile.numLanes
|
"NUM_THREADS" -> tile.numLsuLanes
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
with HasBlackBoxResource {
|
with HasBlackBoxResource {
|
||||||
|
|||||||
@@ -189,12 +189,14 @@ class VortexTile private (
|
|||||||
|
|
||||||
val smemSourceWidth = 4 // FIXME: hardcoded
|
val smemSourceWidth = 4 // FIXME: hardcoded
|
||||||
|
|
||||||
// TODO: parametrize
|
val numWarps = 4 // TODO: parametrize
|
||||||
val numWarps = 4
|
|
||||||
val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps))
|
val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps))
|
||||||
val UUID_WIDTH = 44
|
val UUID_WIDTH = 44
|
||||||
val imemTagWidth = UUID_WIDTH + NW_WIDTH
|
val imemTagWidth = UUID_WIDTH + NW_WIDTH
|
||||||
val LSUQ_TAG_BITS = 4
|
val numLsuLanes = 4
|
||||||
|
// see VX_gpu_pkg.sv
|
||||||
|
val LSUQ_SIZE = 8 * (numLanes / numLsuLanes)
|
||||||
|
val LSUQ_TAG_BITS = log2Ceil(LSUQ_SIZE) + 1 /*DCACHE_BATCH_SEL_BITS*/
|
||||||
val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS
|
val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS
|
||||||
// dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH
|
// dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH
|
||||||
val smemTagWidth = dmemTagWidth
|
val smemTagWidth = dmemTagWidth
|
||||||
@@ -218,7 +220,7 @@ class VortexTile private (
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
val dmemNodes = Seq.tabulate(numLanes) { i =>
|
val dmemNodes = Seq.tabulate(numLsuLanes) { i =>
|
||||||
TLClientNode(
|
TLClientNode(
|
||||||
Seq(
|
Seq(
|
||||||
TLMasterPortParameters.v1(
|
TLMasterPortParameters.v1(
|
||||||
@@ -241,7 +243,7 @@ class VortexTile private (
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
val smemNodes = Seq.tabulate(numLanes) { i =>
|
val smemNodes = Seq.tabulate(numLsuLanes) { i =>
|
||||||
TLClientNode(
|
TLClientNode(
|
||||||
Seq(
|
Seq(
|
||||||
TLMasterPortParameters.v1(
|
TLMasterPortParameters.v1(
|
||||||
@@ -337,12 +339,12 @@ class VortexTile private (
|
|||||||
//
|
//
|
||||||
// Instantiate the same number of banks as there are lanes.
|
// Instantiate the same number of banks as there are lanes.
|
||||||
// TODO: parametrize
|
// TODO: parametrize
|
||||||
val smemBanks = Seq.tabulate(numLanes) { bankId =>
|
val smemBanks = Seq.tabulate(numLsuLanes) { bankId =>
|
||||||
// Banked-by-word (4 bytes)
|
// Banked-by-word (4 bytes)
|
||||||
// base for bank 1: ff...000000|01|00
|
// base for bank 1: ff...000000|01|00
|
||||||
// mask for bank 1; 00...111111|00|11
|
// mask for bank 1; 00...111111|00|11
|
||||||
val base = 0xff000000L | (bankId * 4 /*wordSize*/ )
|
val base = 0xff000000L | (bankId * 4 /*wordSize*/ )
|
||||||
val mask = 0x00ffffffL ^ ((numLanes - 1) * 4 /*wordSize*/ )
|
val mask = 0x00ffffffL ^ ((numLsuLanes - 1) * 4 /*wordSize*/ )
|
||||||
LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = 4 /*wordSize*/ ))
|
LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = 4 /*wordSize*/ ))
|
||||||
}
|
}
|
||||||
// smem lanes-to-banks crossbar
|
// smem lanes-to-banks crossbar
|
||||||
@@ -531,7 +533,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
|
|||||||
// @perf: this would duplicate SourceGenerator table for every lane and eat
|
// @perf: this would duplicate SourceGenerator table for every lane and eat
|
||||||
// up some area
|
// up some area
|
||||||
val dmemTLBundles = outer.dmemNodes.map(_.out.head._1)
|
val dmemTLBundles = outer.dmemNodes.map(_.out.head._1)
|
||||||
val dmemTLAdapters = Seq.tabulate(outer.numLanes) { _ =>
|
val dmemTLAdapters = Seq.tabulate(outer.numLsuLanes) { _ =>
|
||||||
Module(
|
Module(
|
||||||
new VortexTLAdapter(
|
new VortexTLAdapter(
|
||||||
outer.dmemSourceWidth,
|
outer.dmemSourceWidth,
|
||||||
@@ -565,7 +567,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
|
|||||||
new RRArbiter(
|
new RRArbiter(
|
||||||
// FIXME: should really be source on D channel
|
// FIXME: should really be source on D channel
|
||||||
new VortexBundleA(tagWidth = outer.dmemTagWidth, dataWidth = 32).source.cloneType,
|
new VortexBundleA(tagWidth = outer.dmemTagWidth, dataWidth = 32).source.cloneType,
|
||||||
outer.numLanes
|
outer.numLsuLanes
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
arb.io.out.ready := true.B
|
arb.io.out.ready := true.B
|
||||||
@@ -574,7 +576,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
|
|||||||
arbIn.valid := vxDmem.valid
|
arbIn.valid := vxDmem.valid
|
||||||
arbIn.bits := vxDmem.bits.source
|
arbIn.bits := vxDmem.bits.source
|
||||||
}
|
}
|
||||||
val matchingSources = Wire(UInt(outer.numLanes.W))
|
val matchingSources = Wire(UInt(outer.numLsuLanes.W))
|
||||||
matchingSources := dmemBundles
|
matchingSources := dmemBundles
|
||||||
.map(b =>
|
.map(b =>
|
||||||
// If there is no valid response pending across all lanes,
|
// If there is no valid response pending across all lanes,
|
||||||
@@ -609,7 +611,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
|
|||||||
core.io.dmem_d_bits_data := dmemTLAdapters.map(_.io.inResp.bits.data).asUInt
|
core.io.dmem_d_bits_data := dmemTLAdapters.map(_.io.inResp.bits.data).asUInt
|
||||||
|
|
||||||
// override response channel with matchingSources
|
// override response channel with matchingSources
|
||||||
val dmem_d_valid_vec = Wire(Vec(outer.numLanes, Bool()))
|
val dmem_d_valid_vec = Wire(Vec(outer.numLsuLanes, Bool()))
|
||||||
dmemTLAdapters.zipWithIndex.foreach {
|
dmemTLAdapters.zipWithIndex.foreach {
|
||||||
case (tlAdapter, i) =>
|
case (tlAdapter, i) =>
|
||||||
dmem_d_valid_vec(i) := tlAdapter.io.inResp.valid && matchingSources(i)
|
dmem_d_valid_vec(i) := tlAdapter.io.inResp.valid && matchingSources(i)
|
||||||
@@ -632,7 +634,7 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
|
|||||||
// @perf: this would duplicate SourceGenerator table for every lane and eat
|
// @perf: this would duplicate SourceGenerator table for every lane and eat
|
||||||
// up some area
|
// up some area
|
||||||
val smemTLBundles = outer.smemNodes.map(_.out.head._1)
|
val smemTLBundles = outer.smemNodes.map(_.out.head._1)
|
||||||
val smemTLAdapters = Seq.tabulate(outer.numLanes) { _ =>
|
val smemTLAdapters = Seq.tabulate(outer.numLsuLanes) { _ =>
|
||||||
Module(
|
Module(
|
||||||
new VortexTLAdapter(
|
new VortexTLAdapter(
|
||||||
outer.smemSourceWidth,
|
outer.smemSourceWidth,
|
||||||
|
|||||||
Reference in New Issue
Block a user