From 60cd72a9d6ced562372e6221aa2dec1d86a0d7f7 Mon Sep 17 00:00:00 2001 From: Hansung Kim Date: Thu, 4 Jan 2024 00:17:00 -0800 Subject: [PATCH] Flatten dmem bundle of Vortex core IO into 1-D arrays --- src/main/scala/rocket/VortexCore.scala | 44 ++++++++++++++-------- src/main/scala/tile/VortexTile.scala | 52 ++++++++++++++++++++------ 2 files changed, 69 insertions(+), 27 deletions(-) diff --git a/src/main/scala/rocket/VortexCore.scala b/src/main/scala/rocket/VortexCore.scala index 6e93633..5f4caa5 100644 --- a/src/main/scala/rocket/VortexCore.scala +++ b/src/main/scala/rocket/VortexCore.scala @@ -40,28 +40,18 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle val reset_vector = Input(UInt(resetVectorLen.W)) val interrupts = Input(new CoreInterrupts()) - // TODO: parametrize - val numWarps = 4 - val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps)) - val UUID_WIDTH = 44 - val imemTagWidth = UUID_WIDTH + NW_WIDTH - val LSUQ_TAG_BITS = 4 - val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS - // dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH - val smemTagWidth = dmemTagWidth - // conditionally instantiate ports depending on whether we want to use VX_cache or not val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle { - val a = Decoupled(new VortexBundleA(tagWidth = imemTagWidth, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(tagWidth = imemTagWidth, dataWidth = 32))) + val a = Decoupled(new VortexBundleA(tagWidth = tile.imemTagWidth, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.imemTagWidth, dataWidth = 32))) })) else None val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { - val a = Decoupled(new VortexBundleA(tagWidth = dmemTagWidth, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32))) + // val a = Decoupled(new VortexBundleA(tagWidth = tile.dmemTagWidth, dataWidth = 32)) + // val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32))) })) else None val smem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle { - val a = Decoupled(new VortexBundleA(tagWidth = smemTagWidth, dataWidth = 32)) - val d = Flipped(Decoupled(new VortexBundleD(tagWidth = smemTagWidth, dataWidth = 32))) + val a = Decoupled(new VortexBundleA(tagWidth = tile.smemTagWidth, dataWidth = 32)) + val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.smemTagWidth, dataWidth = 32))) })) else None val mem = if (tile.vortexParams.useVxCache) Some(new Bundle { val a = Decoupled(new VortexBundleA(tagWidth = 15, dataWidth = 128)) @@ -70,6 +60,26 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle // val d = Flipped(tile.memNode.out.head._1.d.cloneType) }) else None + // Chisel doesn't support 2-D array in BlackBox interface to Verilog, so + // everything needs to be 1-D flattened UInt with their widths configurable by numLanes. + // + // FIXME: hardcoded bitwidths + val dmem_a_ready = Input(UInt((tile.numLanes * 1).W)) + val dmem_a_valid = Output(UInt((tile.numLanes * 1).W)) + val dmem_a_bits_opcode = Output(UInt((tile.numLanes * 3).W)) + val dmem_a_bits_size = Output(UInt((tile.numLanes * 4).W)) + val dmem_a_bits_source = Output(UInt((tile.numLanes * tile.dmemTagWidth).W)) + val dmem_a_bits_address = Output(UInt((tile.numLanes * 32).W)) + val dmem_a_bits_mask = Output(UInt((tile.numLanes * 4).W)) + val dmem_a_bits_data = Output(UInt((tile.numLanes * 32).W)) + + val dmem_d_valid = Input(UInt((tile.numLanes * 1).W)) + val dmem_d_bits_opcode = Input(UInt((tile.numLanes * 3).W)) + val dmem_d_bits_size = Input(UInt((tile.numLanes * 4).W)) + val dmem_d_bits_source = Input(UInt((tile.numLanes * tile.dmemTagWidth).W)) + val dmem_d_bits_data = Input(UInt((tile.numLanes * 32).W)) + val dmem_d_ready = Output(UInt((tile.numLanes * 1).W)) + // val fpu = Flipped(new FPUCoreIO()) //val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs)) //val trace = Output(new TraceBundle) @@ -88,6 +98,7 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) "CORE_ID" -> tile.tileParams.hartId, // TODO: can we get this as a parameter? "BOOTROM_HANG100" -> 0x10100, + "NUM_THREADS" -> tile.numLanes ) ) with HasBlackBoxResource { @@ -109,6 +120,7 @@ class Vortex(tile: VortexTile)(implicit p: Parameters) // addResource("/vsrc/vortex/hw/syn/synopsys/models/memory/cln28hpc/rf2_32x128_wm1/vsim/rf2_32x128_wm1_tb.v") // addResource("/vsrc/vortex/hw/syn/modelsim/vortex_tb.v") + addResource("/vsrc/vortex/hw/rtl/VX_gpu_pkg.sv") // addResource("/vsrc/vortex/hw/rtl/VX_cluster.sv") diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala index 5dd2aa0..482ba7e 100644 --- a/src/main/scala/tile/VortexTile.scala +++ b/src/main/scala/tile/VortexTile.scala @@ -189,6 +189,16 @@ class VortexTile private ( val smemSourceWidth = 4 // FIXME: hardcoded + // TODO: parametrize + val numWarps = 4 + val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps)) + val UUID_WIDTH = 44 + val imemTagWidth = UUID_WIDTH + NW_WIDTH + val LSUQ_TAG_BITS = 4 + val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS + // dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH + val smemTagWidth = dmemTagWidth + val imemNodes = Seq.tabulate(1) { i => TLClientNode( Seq( @@ -525,8 +535,8 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { Module( new VortexTLAdapter( outer.dmemSourceWidth, - chiselTypeOf(core.io.dmem.get(0).a.bits), - chiselTypeOf(core.io.dmem.get(0).d.bits), + new VortexBundleA(tagWidth = outer.dmemTagWidth, dataWidth = 32), + new VortexBundleD(tagWidth = outer.dmemTagWidth, dataWidth = 32), outer.dmemNodes(0).out.head ) ) @@ -553,7 +563,8 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // of a same source id for all lanes. val arb = Module( new RRArbiter( - core.io.dmem.get.head.d.bits.source.cloneType, + // FIXME: should really be source on D channel + new VortexBundleA(tagWidth = outer.dmemTagWidth, dataWidth = 32).source.cloneType, outer.numLanes ) ) @@ -575,17 +586,36 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) { // make connection: // VortexBundle <--> sourceId filter <--> VortexTLAdapter <--> dmemNodes - (core.io.dmem.get zip dmemTLAdapters) foreach { - case (coreMem, tlAdapter) => - tlAdapter.io.inReq <> coreMem.a - coreMem.d <> tlAdapter.io.inResp + // + // Chisel doesn't support 2-D array in BlackBox interface to Verilog, so + // need to flatten everything. + dmemTLAdapters.zipWithIndex.foreach { + case (tlAdapter, i) => + // tlAdapter.io.inReq <> coreMem.a + tlAdapter.io.inReq.valid := core.io.dmem_a_valid(i) + tlAdapter.io.inReq.bits.opcode := core.io.dmem_a_bits_opcode(3 * (i + 1) - 1, 3 * i) + tlAdapter.io.inReq.bits.size := core.io.dmem_a_bits_size(4 * (i + 1) - 1, 4 * i) + tlAdapter.io.inReq.bits.source := core.io.dmem_a_bits_source(outer.dmemTagWidth * (i + 1) - 1, outer.dmemTagWidth * i) + tlAdapter.io.inReq.bits.address := core.io.dmem_a_bits_address(32 * (i + 1) - 1, 32 * i) + tlAdapter.io.inReq.bits.mask := core.io.dmem_a_bits_mask(4 * (i + 1) - 1, 4 * i) + tlAdapter.io.inReq.bits.data := core.io.dmem_a_bits_data(32 * (i + 1) - 1, 32 * i) } + core.io.dmem_a_ready := dmemTLAdapters.map(_.io.inReq.ready).asUInt + + core.io.dmem_d_valid := dmemTLAdapters.map(_.io.inResp.valid).asUInt + core.io.dmem_d_bits_opcode := dmemTLAdapters.map(_.io.inResp.bits.opcode).asUInt + core.io.dmem_d_bits_size := dmemTLAdapters.map(_.io.inResp.bits.size).asUInt + core.io.dmem_d_bits_source := dmemTLAdapters.map(_.io.inResp.bits.source).asUInt + core.io.dmem_d_bits_data := dmemTLAdapters.map(_.io.inResp.bits.data).asUInt + // override response channel with matchingSources - (core.io.dmem.get zip dmemTLAdapters).zipWithIndex.foreach { - case ((coreMem, tlAdapter), i) => - coreMem.d.valid := tlAdapter.io.inResp.valid && matchingSources(i) - tlAdapter.io.inResp.ready := coreMem.d.ready && matchingSources(i) + val dmem_d_valid_vec = Wire(Vec(outer.numLanes, Bool())) + dmemTLAdapters.zipWithIndex.foreach { + case (tlAdapter, i) => + dmem_d_valid_vec(i) := tlAdapter.io.inResp.valid && matchingSources(i) + tlAdapter.io.inResp.ready := core.io.dmem_d_ready(i) && matchingSources(i) } + core.io.dmem_d_valid := dmem_d_valid_vec.asUInt (dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlOut) => tlOut.a <> tlAdapter.io.outReq tlAdapter.io.outResp <> tlOut.d