Store multiple oldSrcId reqs per lane in a table row

The number of the per-lane reqs is controlled by `numPerLaneReqs`
rather than being set to 2 ** sourceWidth to allow some flexibility.
This commit is contained in:
Hansung Kim
2023-03-29 14:02:41 -07:00
parent 3b335bda18
commit 12b3b67687
2 changed files with 120 additions and 80 deletions

View File

@@ -62,22 +62,27 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
new ShiftQueue(reqQueueEntryT, 4 /* FIXME hardcoded */ ) new ShiftQueue(reqQueueEntryT, 4 /* FIXME hardcoded */ )
) )
} }
// The maximum number of requests from a single lane that can go into a
// coalesced request. Upper bound is 2**sourceWidth.
val numPerLaneReqs = 2
val respQueueEntryT = new RespQueueEntry(sourceWidth, wordSize * 8) val respQueueEntryT = new RespQueueEntry(sourceWidth, wordSize * 8)
val respQueues = Seq.tabulate(numLanes) { _ => val respQueues = Seq.tabulate(numLanes) { _ =>
// Module(
// new ShiftQueue(respQueueEntryT, 8 /* FIXME depth hardcoded */ )
// )
Module( Module(
new MultiPortQueue( new MultiPortQueue(
respQueueEntryT, respQueueEntryT,
// enq_lanes = 1 + M, where 1 is the response for the original per-lane // enq_lanes = 1 + M, where 1 is the response for the original per-lane
// requests that didn't get coalesced, and M is the number of coalescer // requests that didn't get coalesced, and M is the maximum number of
// nodes. // single-lane requests that can go into a coalesced request.
2, // (`numPerLaneReqs`).
1 + numPerLaneReqs,
// deq_lanes = 1 because we're serializing all responses to 1 port that // deq_lanes = 1 because we're serializing all responses to 1 port that
// goes back to the core. // goes back to the core.
1, 1,
2, // lanes. Has to be at least max(enq_lanes, deq_lanes)
1 + numPerLaneReqs,
// Depth of each lane queue.
// XXX queue depth is set to an arbitrarily high value that doesn't // XXX queue depth is set to an arbitrarily high value that doesn't
// make queue block up in the middle of the simulation. Ideally there // make queue block up in the middle of the simulation. Ideally there
// should be a more logical way to set this, or we should handle // should be a more logical way to set this, or we should handle
@@ -210,25 +215,32 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
// detail outside to the coalescer // detail outside to the coalescer
val offsetBits = 4 // FIXME hardcoded val offsetBits = 4 // FIXME hardcoded
val sizeBits = 2 // FIXME hardcoded val sizeBits = 2 // FIXME hardcoded
val newEntry = Wire(new InflightCoalReqTableEntry(numLanes, sourceWidth, offsetBits, sizeBits)) val newEntry = Wire(
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
)
newEntry.source := coalSourceId newEntry.source := coalSourceId
newEntry.lanes.foreach { l => newEntry.lanes.foreach { l =>
l.valid := false.B
l.reqs.foreach { r => l.reqs.foreach { r =>
// TODO: this part needs the actual coalescing logic to work // TODO: this part needs the actual coalescing logic to work
r.valid := true.B r.valid := false.B
r.offset := 1.U r.offset := 1.U
r.size := 2.U r.size := 2.U
} }
} }
newEntry.lanes(0).valid := true.B newEntry.lanes(0).reqs(0).valid := true.B
newEntry.lanes(2).valid := true.B newEntry.lanes(2).reqs(0).valid := true.B
dontTouch(newEntry) dontTouch(newEntry)
// Uncoalescer module sncoalesces responses back to each lane // Uncoalescer module sncoalesces responses back to each lane
val coalDataWidth = tlCoal.params.dataBits val coalDataWidth = tlCoal.params.dataBits
val uncoalescer = Module( val uncoalescer = Module(
new UncoalescingUnit(numLanes, sourceWidth, coalDataWidth, outer.numInflightCoalRequests) new UncoalescingUnit(
numLanes,
numPerLaneReqs,
sourceWidth,
coalDataWidth,
outer.numInflightCoalRequests
)
) )
uncoalescer.io.coalReqValid := coalReqValid uncoalescer.io.coalReqValid := coalReqValid
@@ -238,14 +250,16 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
uncoalescer.io.coalRespData := tlCoal.d.bits.data uncoalescer.io.coalRespData := tlCoal.d.bits.data
// Queue up uncoalesced responses into each lane's response queue // Queue up uncoalesced responses into each lane's response queue
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, resp) => (respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
assert( lanes.zipWithIndex.foreach { case (resp, i) =>
q.io.enq(respQueueCoalPortOffset).ready, assert(
s"respQueue: enq port for 0-th coalesced response is blocked" q.io.enq(respQueueCoalPortOffset + i).ready,
) s"respQueue: enq port for 0-th coalesced response is blocked"
q.io.enq(respQueueCoalPortOffset).valid := resp.valid )
q.io.enq(respQueueCoalPortOffset).bits := resp.bits q.io.enq(respQueueCoalPortOffset + i).valid := resp.valid
// dontTouch(q.io.enq(respQueueCoalPortOffset)) q.io.enq(respQueueCoalPortOffset + i).bits := resp.bits
// dontTouch(q.io.enq(respQueueCoalPortOffset))
}
} }
// Debug // Debug
@@ -260,12 +274,13 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
class UncoalescingUnit( class UncoalescingUnit(
val numLanes: Int, val numLanes: Int,
val numPerLaneReqs: Int,
val sourceWidth: Int, val sourceWidth: Int,
val coalDataWidth: Int, val coalDataWidth: Int,
val numInflightCoalRequests: Int val numInflightCoalRequests: Int
) extends Module { ) extends Module {
val inflightTable = Module( val inflightTable = Module(
new InflightCoalReqTable(numLanes, sourceWidth, numInflightCoalRequests) new InflightCoalReqTable(numLanes, numPerLaneReqs, sourceWidth, numInflightCoalRequests)
) )
val wordSize = 4 // FIXME duplicate val wordSize = 4 // FIXME duplicate
@@ -275,7 +290,9 @@ class UncoalescingUnit(
val coalRespValid = Input(Bool()) val coalRespValid = Input(Bool())
val coalRespSrcId = Input(UInt(sourceWidth.W)) val coalRespSrcId = Input(UInt(sourceWidth.W))
val coalRespData = Input(UInt(coalDataWidth.W)) val coalRespData = Input(UInt(coalDataWidth.W))
val uncoalResps = Output(Vec(numLanes, ValidIO(new RespQueueEntry(sourceWidth, wordSize * 8)))) val uncoalResps = Output(
Vec(numLanes, Vec(numPerLaneReqs, ValidIO(new RespQueueEntry(sourceWidth, wordSize * 8))))
)
}) })
// Populate inflight table // Populate inflight table
@@ -311,25 +328,26 @@ class UncoalescingUnit(
// Un-coalesce responses back to individual lanes // Un-coalesce responses back to individual lanes
val found = inflightTable.io.lookup.bits val found = inflightTable.io.lookup.bits
found.lanes.zipWithIndex.foreach { case (l, i) => (found.lanes zip io.uncoalResps).foreach { case (lane, ioLane) =>
// FIXME: only looking at 0th srcId entry lane.reqs.zipWithIndex.foreach { case (req, i) =>
val ioReq = ioLane(i)
val uncoalResp = io.uncoalResps(i) // FIXME: only looking at 0th srcId entry
uncoalResp.valid := false.B
uncoalResp.bits := DontCare
when(inflightTable.io.lookup.valid) { ioReq.valid := false.B
uncoalResp.valid := l.valid ioReq.bits := DontCare
uncoalResp.bits.source := 0.U
// FIXME: disregard size enum for now when(inflightTable.io.lookup.valid) {
val byteSize = 4 ioReq.valid := req.valid
uncoalResp.bits.data := ioReq.bits.source := 0.U
getCoalescedDataChunk(io.coalRespData, coalDataWidth, l.reqs(0).offset, byteSize)
}
when(l.valid) { // FIXME: disregard size enum for now
when(l.reqs(0).valid) { val byteSize = 4
ioReq.bits.data :=
getCoalescedDataChunk(io.coalRespData, coalDataWidth, req.offset, byteSize)
}
when(req.valid) {
printf(s"lane ${i} req 0 is valid!\n") printf(s"lane ${i} req 0 is valid!\n")
} }
} }
@@ -341,12 +359,16 @@ class UncoalescingUnit(
// from, what their original TileLink sourceId were, etc. We use this info to // from, what their original TileLink sourceId were, etc. We use this info to
// split the coalesced response back to individual per-lane responses with the // split the coalesced response back to individual per-lane responses with the
// right metadata. // right metadata.
class InflightCoalReqTable(val numLanes: Int, val sourceWidth: Int, val entries: Int) class InflightCoalReqTable(
extends Module { val numLanes: Int,
val numPerLaneReqs: Int,
val sourceWidth: Int,
val entries: Int
) extends Module {
val offsetBits = 4 // FIXME hardcoded val offsetBits = 4 // FIXME hardcoded
val sizeBits = 2 // FIXME hardcoded val sizeBits = 2 // FIXME hardcoded
val entryT = val entryT =
new InflightCoalReqTableEntry(numLanes, sourceWidth, offsetBits, sizeBits) new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
val io = IO(new Bundle { val io = IO(new Bundle {
val enq = Flipped(Decoupled(entryT)) val enq = Flipped(Decoupled(entryT))
@@ -362,6 +384,7 @@ class InflightCoalReqTable(val numLanes: Int, val sourceWidth: Int, val entries:
val valid = Bool() val valid = Bool()
val bits = new InflightCoalReqTableEntry( val bits = new InflightCoalReqTableEntry(
numLanes, numLanes,
numPerLaneReqs,
sourceWidth, sourceWidth,
offsetBits, offsetBits,
sizeBits sizeBits
@@ -373,7 +396,6 @@ class InflightCoalReqTable(val numLanes: Int, val sourceWidth: Int, val entries:
(0 until entries).foreach { i => (0 until entries).foreach { i =>
table(i).valid := false.B table(i).valid := false.B
table(i).bits.lanes.foreach { l => table(i).bits.lanes.foreach { l =>
l.valid := false.B
l.reqs.foreach { r => l.reqs.foreach { r =>
r.offset := 0.U r.offset := 0.U
r.size := 0.U r.size := 0.U
@@ -422,6 +444,8 @@ class InflightCoalReqTable(val numLanes: Int, val sourceWidth: Int, val entries:
class InflightCoalReqTableEntry( class InflightCoalReqTableEntry(
val numLanes: Int, val numLanes: Int,
// Maximum number of requests from a single lane that can get coalesced into a single request
val numPerLaneReqs: Int,
val sourceWidth: Int, val sourceWidth: Int,
val offsetBits: Int, val offsetBits: Int,
val sizeBits: Int val sizeBits: Int
@@ -432,9 +456,8 @@ class InflightCoalReqTableEntry(
val size = UInt(sizeBits.W) val size = UInt(sizeBits.W)
} }
class PerLane extends Bundle { class PerLane extends Bundle {
val valid = Bool() // FIXME: if numPerLaneReqs != 2 ** sourceWidth, we need to store srcId as well
// srcId is positionally encoded val reqs = Vec(numPerLaneReqs, new CoreReq)
val reqs = Vec(1 << sourceWidth, new CoreReq)
} }
// sourceId of the coalesced response that just came back. This will be the // sourceId of the coalesced response that just came back. This will be the
// key that queries the table. // key that queries the table.

View File

@@ -32,58 +32,75 @@ class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester {
class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester { class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
behavior of "uncoalescer" behavior of "uncoalescer"
val numLanes = 4 val numLanes = 4
val numPerLaneReqs = 2
val sourceWidth = 2 val sourceWidth = 2
// 16B coalescing size // 16B coalescing size
val coalDataWidth = 128 val coalDataWidth = 128
val numInflightCoalRequests = 4 val numInflightCoalRequests = 4
it should "work" in { it should "work" in {
test(new UncoalescingUnit(numLanes, sourceWidth, coalDataWidth, numInflightCoalRequests)) test(
// vcs helps with simulation time, but sometimes errors with new UncoalescingUnit(
// "mutation occurred during iteration" java error numLanes,
// .withAnnotations(Seq(VcsBackendAnnotation)) numPerLaneReqs,
{ c => sourceWidth,
val sourceId = 0.U coalDataWidth,
c.io.coalReqValid.poke(true.B) numInflightCoalRequests
c.io.newEntry.source.poke(sourceId) )
c.io.newEntry.lanes.foreach { l => l.valid.poke(false.B) } )
c.io.newEntry.lanes(0).valid.poke(true.B) // vcs helps with simulation time, but sometimes errors with
c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B) // "mutation occurred during iteration" java error
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U) // .withAnnotations(Seq(VcsBackendAnnotation))
c.io.newEntry.lanes(0).reqs(0).size.poke(2.U) { c =>
c.io.newEntry.lanes(2).valid.poke(true.B) val sourceId = 0.U
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B) c.io.coalReqValid.poke(true.B)
c.io.newEntry.lanes(2).reqs(0).offset.poke(2.U) c.io.newEntry.source.poke(sourceId)
c.io.newEntry.lanes(2).reqs(0).size.poke(1.U) c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U)
c.io.newEntry.lanes(0).reqs(0).size.poke(2.U)
c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B)
c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U)
c.io.newEntry.lanes(0).reqs(1).size.poke(2.U)
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
c.io.newEntry.lanes(2).reqs(0).offset.poke(2.U)
c.io.newEntry.lanes(2).reqs(0).size.poke(1.U)
c.io.newEntry.lanes(2).reqs(1).valid.poke(true.B)
c.io.newEntry.lanes(2).reqs(1).offset.poke(0.U)
c.io.newEntry.lanes(2).reqs(1).size.poke(2.U)
c.clock.step() c.clock.step()
c.io.coalReqValid.poke(false.B) c.io.coalReqValid.poke(false.B)
c.clock.step() c.clock.step()
c.io.coalRespValid.poke(true.B) c.io.coalRespValid.poke(true.B)
c.io.coalRespSrcId.poke(sourceId) c.io.coalRespSrcId.poke(sourceId)
val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL) val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL)
c.io.coalRespData.poke(lit.U) c.io.coalRespData.poke(lit.U)
// table lookup is combinational at the same cycle // table lookup is combinational at the same cycle
c.io.uncoalResps(0).valid.expect(true.B) c.io.uncoalResps(0)(0).valid.expect(true.B)
c.io.uncoalResps(1).valid.expect(false.B) c.io.uncoalResps(1)(0).valid.expect(false.B)
c.io.uncoalResps(2).valid.expect(true.B) c.io.uncoalResps(2)(0).valid.expect(true.B)
c.io.uncoalResps(3).valid.expect(false.B) c.io.uncoalResps(3)(0).valid.expect(false.B)
c.io.uncoalResps(0).bits.data.expect(0x89abcdefL.U) c.io.uncoalResps(0)(0).bits.data.expect(0x89abcdefL.U)
c.io.uncoalResps(0).bits.source.expect(0.U) c.io.uncoalResps(0)(0).bits.source.expect(0.U)
c.io.uncoalResps(2).bits.data.expect(0x5ca1ab1eL.U) c.io.uncoalResps(0)(1).bits.data.expect(0x89abcdefL.U)
c.io.uncoalResps(2).bits.source.expect(0.U) c.io.uncoalResps(0)(1).bits.source.expect(0.U)
} c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U)
c.io.uncoalResps(2)(0).bits.source.expect(0.U)
c.io.uncoalResps(2)(1).bits.data.expect(0x01234567L.U)
c.io.uncoalResps(2)(1).bits.source.expect(0.U)
}
} }
} }
class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester { class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {
behavior of "inflight coalesced request table" behavior of "inflight coalesced request table"
val numLanes = 4 val numLanes = 4
val numPerLaneReqs = 2
val sourceWidth = 2 val sourceWidth = 2
val entries = 4 val entries = 4
@@ -91,7 +108,7 @@ class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {
val sizeBits = 2 val sizeBits = 2
val inflightCoalReqTableEntry = val inflightCoalReqTableEntry =
new InflightCoalReqTableEntry(numLanes, sourceWidth, offsetBits, sizeBits) new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
// it should "stop enqueueing when full" in { // it should "stop enqueueing when full" in {
// test(new InflightCoalReqTable(numLanes, sourceWidth, entries)) { c => // test(new InflightCoalReqTable(numLanes, sourceWidth, entries)) { c =>