Store multiple oldSrcId reqs per lane in a table row
The number of the per-lane reqs is controlled by `numPerLaneReqs` rather than being set to 2 ** sourceWidth to allow some flexibility.
This commit is contained in:
@@ -62,22 +62,27 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
new ShiftQueue(reqQueueEntryT, 4 /* FIXME hardcoded */ )
|
new ShiftQueue(reqQueueEntryT, 4 /* FIXME hardcoded */ )
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The maximum number of requests from a single lane that can go into a
|
||||||
|
// coalesced request. Upper bound is 2**sourceWidth.
|
||||||
|
val numPerLaneReqs = 2
|
||||||
|
|
||||||
val respQueueEntryT = new RespQueueEntry(sourceWidth, wordSize * 8)
|
val respQueueEntryT = new RespQueueEntry(sourceWidth, wordSize * 8)
|
||||||
val respQueues = Seq.tabulate(numLanes) { _ =>
|
val respQueues = Seq.tabulate(numLanes) { _ =>
|
||||||
// Module(
|
|
||||||
// new ShiftQueue(respQueueEntryT, 8 /* FIXME depth hardcoded */ )
|
|
||||||
// )
|
|
||||||
Module(
|
Module(
|
||||||
new MultiPortQueue(
|
new MultiPortQueue(
|
||||||
respQueueEntryT,
|
respQueueEntryT,
|
||||||
// enq_lanes = 1 + M, where 1 is the response for the original per-lane
|
// enq_lanes = 1 + M, where 1 is the response for the original per-lane
|
||||||
// requests that didn't get coalesced, and M is the number of coalescer
|
// requests that didn't get coalesced, and M is the maximum number of
|
||||||
// nodes.
|
// single-lane requests that can go into a coalesced request.
|
||||||
2,
|
// (`numPerLaneReqs`).
|
||||||
|
1 + numPerLaneReqs,
|
||||||
// deq_lanes = 1 because we're serializing all responses to 1 port that
|
// deq_lanes = 1 because we're serializing all responses to 1 port that
|
||||||
// goes back to the core.
|
// goes back to the core.
|
||||||
1,
|
1,
|
||||||
2,
|
// lanes. Has to be at least max(enq_lanes, deq_lanes)
|
||||||
|
1 + numPerLaneReqs,
|
||||||
|
// Depth of each lane queue.
|
||||||
// XXX queue depth is set to an arbitrarily high value that doesn't
|
// XXX queue depth is set to an arbitrarily high value that doesn't
|
||||||
// make queue block up in the middle of the simulation. Ideally there
|
// make queue block up in the middle of the simulation. Ideally there
|
||||||
// should be a more logical way to set this, or we should handle
|
// should be a more logical way to set this, or we should handle
|
||||||
@@ -210,25 +215,32 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
// detail outside to the coalescer
|
// detail outside to the coalescer
|
||||||
val offsetBits = 4 // FIXME hardcoded
|
val offsetBits = 4 // FIXME hardcoded
|
||||||
val sizeBits = 2 // FIXME hardcoded
|
val sizeBits = 2 // FIXME hardcoded
|
||||||
val newEntry = Wire(new InflightCoalReqTableEntry(numLanes, sourceWidth, offsetBits, sizeBits))
|
val newEntry = Wire(
|
||||||
|
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
||||||
|
)
|
||||||
newEntry.source := coalSourceId
|
newEntry.source := coalSourceId
|
||||||
newEntry.lanes.foreach { l =>
|
newEntry.lanes.foreach { l =>
|
||||||
l.valid := false.B
|
|
||||||
l.reqs.foreach { r =>
|
l.reqs.foreach { r =>
|
||||||
// TODO: this part needs the actual coalescing logic to work
|
// TODO: this part needs the actual coalescing logic to work
|
||||||
r.valid := true.B
|
r.valid := false.B
|
||||||
r.offset := 1.U
|
r.offset := 1.U
|
||||||
r.size := 2.U
|
r.size := 2.U
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
newEntry.lanes(0).valid := true.B
|
newEntry.lanes(0).reqs(0).valid := true.B
|
||||||
newEntry.lanes(2).valid := true.B
|
newEntry.lanes(2).reqs(0).valid := true.B
|
||||||
dontTouch(newEntry)
|
dontTouch(newEntry)
|
||||||
|
|
||||||
// Uncoalescer module sncoalesces responses back to each lane
|
// Uncoalescer module sncoalesces responses back to each lane
|
||||||
val coalDataWidth = tlCoal.params.dataBits
|
val coalDataWidth = tlCoal.params.dataBits
|
||||||
val uncoalescer = Module(
|
val uncoalescer = Module(
|
||||||
new UncoalescingUnit(numLanes, sourceWidth, coalDataWidth, outer.numInflightCoalRequests)
|
new UncoalescingUnit(
|
||||||
|
numLanes,
|
||||||
|
numPerLaneReqs,
|
||||||
|
sourceWidth,
|
||||||
|
coalDataWidth,
|
||||||
|
outer.numInflightCoalRequests
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
uncoalescer.io.coalReqValid := coalReqValid
|
uncoalescer.io.coalReqValid := coalReqValid
|
||||||
@@ -238,14 +250,16 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
uncoalescer.io.coalRespData := tlCoal.d.bits.data
|
uncoalescer.io.coalRespData := tlCoal.d.bits.data
|
||||||
|
|
||||||
// Queue up uncoalesced responses into each lane's response queue
|
// Queue up uncoalesced responses into each lane's response queue
|
||||||
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, resp) =>
|
(respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
|
||||||
assert(
|
lanes.zipWithIndex.foreach { case (resp, i) =>
|
||||||
q.io.enq(respQueueCoalPortOffset).ready,
|
assert(
|
||||||
s"respQueue: enq port for 0-th coalesced response is blocked"
|
q.io.enq(respQueueCoalPortOffset + i).ready,
|
||||||
)
|
s"respQueue: enq port for 0-th coalesced response is blocked"
|
||||||
q.io.enq(respQueueCoalPortOffset).valid := resp.valid
|
)
|
||||||
q.io.enq(respQueueCoalPortOffset).bits := resp.bits
|
q.io.enq(respQueueCoalPortOffset + i).valid := resp.valid
|
||||||
// dontTouch(q.io.enq(respQueueCoalPortOffset))
|
q.io.enq(respQueueCoalPortOffset + i).bits := resp.bits
|
||||||
|
// dontTouch(q.io.enq(respQueueCoalPortOffset))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Debug
|
// Debug
|
||||||
@@ -260,12 +274,13 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
|
|||||||
|
|
||||||
class UncoalescingUnit(
|
class UncoalescingUnit(
|
||||||
val numLanes: Int,
|
val numLanes: Int,
|
||||||
|
val numPerLaneReqs: Int,
|
||||||
val sourceWidth: Int,
|
val sourceWidth: Int,
|
||||||
val coalDataWidth: Int,
|
val coalDataWidth: Int,
|
||||||
val numInflightCoalRequests: Int
|
val numInflightCoalRequests: Int
|
||||||
) extends Module {
|
) extends Module {
|
||||||
val inflightTable = Module(
|
val inflightTable = Module(
|
||||||
new InflightCoalReqTable(numLanes, sourceWidth, numInflightCoalRequests)
|
new InflightCoalReqTable(numLanes, numPerLaneReqs, sourceWidth, numInflightCoalRequests)
|
||||||
)
|
)
|
||||||
val wordSize = 4 // FIXME duplicate
|
val wordSize = 4 // FIXME duplicate
|
||||||
|
|
||||||
@@ -275,7 +290,9 @@ class UncoalescingUnit(
|
|||||||
val coalRespValid = Input(Bool())
|
val coalRespValid = Input(Bool())
|
||||||
val coalRespSrcId = Input(UInt(sourceWidth.W))
|
val coalRespSrcId = Input(UInt(sourceWidth.W))
|
||||||
val coalRespData = Input(UInt(coalDataWidth.W))
|
val coalRespData = Input(UInt(coalDataWidth.W))
|
||||||
val uncoalResps = Output(Vec(numLanes, ValidIO(new RespQueueEntry(sourceWidth, wordSize * 8))))
|
val uncoalResps = Output(
|
||||||
|
Vec(numLanes, Vec(numPerLaneReqs, ValidIO(new RespQueueEntry(sourceWidth, wordSize * 8))))
|
||||||
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Populate inflight table
|
// Populate inflight table
|
||||||
@@ -311,25 +328,26 @@ class UncoalescingUnit(
|
|||||||
|
|
||||||
// Un-coalesce responses back to individual lanes
|
// Un-coalesce responses back to individual lanes
|
||||||
val found = inflightTable.io.lookup.bits
|
val found = inflightTable.io.lookup.bits
|
||||||
found.lanes.zipWithIndex.foreach { case (l, i) =>
|
(found.lanes zip io.uncoalResps).foreach { case (lane, ioLane) =>
|
||||||
// FIXME: only looking at 0th srcId entry
|
lane.reqs.zipWithIndex.foreach { case (req, i) =>
|
||||||
|
val ioReq = ioLane(i)
|
||||||
|
|
||||||
val uncoalResp = io.uncoalResps(i)
|
// FIXME: only looking at 0th srcId entry
|
||||||
uncoalResp.valid := false.B
|
|
||||||
uncoalResp.bits := DontCare
|
|
||||||
|
|
||||||
when(inflightTable.io.lookup.valid) {
|
ioReq.valid := false.B
|
||||||
uncoalResp.valid := l.valid
|
ioReq.bits := DontCare
|
||||||
uncoalResp.bits.source := 0.U
|
|
||||||
|
|
||||||
// FIXME: disregard size enum for now
|
when(inflightTable.io.lookup.valid) {
|
||||||
val byteSize = 4
|
ioReq.valid := req.valid
|
||||||
uncoalResp.bits.data :=
|
ioReq.bits.source := 0.U
|
||||||
getCoalescedDataChunk(io.coalRespData, coalDataWidth, l.reqs(0).offset, byteSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
when(l.valid) {
|
// FIXME: disregard size enum for now
|
||||||
when(l.reqs(0).valid) {
|
val byteSize = 4
|
||||||
|
ioReq.bits.data :=
|
||||||
|
getCoalescedDataChunk(io.coalRespData, coalDataWidth, req.offset, byteSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
when(req.valid) {
|
||||||
printf(s"lane ${i} req 0 is valid!\n")
|
printf(s"lane ${i} req 0 is valid!\n")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -341,12 +359,16 @@ class UncoalescingUnit(
|
|||||||
// from, what their original TileLink sourceId were, etc. We use this info to
|
// from, what their original TileLink sourceId were, etc. We use this info to
|
||||||
// split the coalesced response back to individual per-lane responses with the
|
// split the coalesced response back to individual per-lane responses with the
|
||||||
// right metadata.
|
// right metadata.
|
||||||
class InflightCoalReqTable(val numLanes: Int, val sourceWidth: Int, val entries: Int)
|
class InflightCoalReqTable(
|
||||||
extends Module {
|
val numLanes: Int,
|
||||||
|
val numPerLaneReqs: Int,
|
||||||
|
val sourceWidth: Int,
|
||||||
|
val entries: Int
|
||||||
|
) extends Module {
|
||||||
val offsetBits = 4 // FIXME hardcoded
|
val offsetBits = 4 // FIXME hardcoded
|
||||||
val sizeBits = 2 // FIXME hardcoded
|
val sizeBits = 2 // FIXME hardcoded
|
||||||
val entryT =
|
val entryT =
|
||||||
new InflightCoalReqTableEntry(numLanes, sourceWidth, offsetBits, sizeBits)
|
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
||||||
|
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val enq = Flipped(Decoupled(entryT))
|
val enq = Flipped(Decoupled(entryT))
|
||||||
@@ -362,6 +384,7 @@ class InflightCoalReqTable(val numLanes: Int, val sourceWidth: Int, val entries:
|
|||||||
val valid = Bool()
|
val valid = Bool()
|
||||||
val bits = new InflightCoalReqTableEntry(
|
val bits = new InflightCoalReqTableEntry(
|
||||||
numLanes,
|
numLanes,
|
||||||
|
numPerLaneReqs,
|
||||||
sourceWidth,
|
sourceWidth,
|
||||||
offsetBits,
|
offsetBits,
|
||||||
sizeBits
|
sizeBits
|
||||||
@@ -373,7 +396,6 @@ class InflightCoalReqTable(val numLanes: Int, val sourceWidth: Int, val entries:
|
|||||||
(0 until entries).foreach { i =>
|
(0 until entries).foreach { i =>
|
||||||
table(i).valid := false.B
|
table(i).valid := false.B
|
||||||
table(i).bits.lanes.foreach { l =>
|
table(i).bits.lanes.foreach { l =>
|
||||||
l.valid := false.B
|
|
||||||
l.reqs.foreach { r =>
|
l.reqs.foreach { r =>
|
||||||
r.offset := 0.U
|
r.offset := 0.U
|
||||||
r.size := 0.U
|
r.size := 0.U
|
||||||
@@ -422,6 +444,8 @@ class InflightCoalReqTable(val numLanes: Int, val sourceWidth: Int, val entries:
|
|||||||
|
|
||||||
class InflightCoalReqTableEntry(
|
class InflightCoalReqTableEntry(
|
||||||
val numLanes: Int,
|
val numLanes: Int,
|
||||||
|
// Maximum number of requests from a single lane that can get coalesced into a single request
|
||||||
|
val numPerLaneReqs: Int,
|
||||||
val sourceWidth: Int,
|
val sourceWidth: Int,
|
||||||
val offsetBits: Int,
|
val offsetBits: Int,
|
||||||
val sizeBits: Int
|
val sizeBits: Int
|
||||||
@@ -432,9 +456,8 @@ class InflightCoalReqTableEntry(
|
|||||||
val size = UInt(sizeBits.W)
|
val size = UInt(sizeBits.W)
|
||||||
}
|
}
|
||||||
class PerLane extends Bundle {
|
class PerLane extends Bundle {
|
||||||
val valid = Bool()
|
// FIXME: if numPerLaneReqs != 2 ** sourceWidth, we need to store srcId as well
|
||||||
// srcId is positionally encoded
|
val reqs = Vec(numPerLaneReqs, new CoreReq)
|
||||||
val reqs = Vec(1 << sourceWidth, new CoreReq)
|
|
||||||
}
|
}
|
||||||
// sourceId of the coalesced response that just came back. This will be the
|
// sourceId of the coalesced response that just came back. This will be the
|
||||||
// key that queries the table.
|
// key that queries the table.
|
||||||
|
|||||||
@@ -32,58 +32,75 @@ class MultiPortQueueUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
class UncoalescingUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||||
behavior of "uncoalescer"
|
behavior of "uncoalescer"
|
||||||
val numLanes = 4
|
val numLanes = 4
|
||||||
|
val numPerLaneReqs = 2
|
||||||
val sourceWidth = 2
|
val sourceWidth = 2
|
||||||
// 16B coalescing size
|
// 16B coalescing size
|
||||||
val coalDataWidth = 128
|
val coalDataWidth = 128
|
||||||
val numInflightCoalRequests = 4
|
val numInflightCoalRequests = 4
|
||||||
|
|
||||||
it should "work" in {
|
it should "work" in {
|
||||||
test(new UncoalescingUnit(numLanes, sourceWidth, coalDataWidth, numInflightCoalRequests))
|
test(
|
||||||
// vcs helps with simulation time, but sometimes errors with
|
new UncoalescingUnit(
|
||||||
// "mutation occurred during iteration" java error
|
numLanes,
|
||||||
// .withAnnotations(Seq(VcsBackendAnnotation))
|
numPerLaneReqs,
|
||||||
{ c =>
|
sourceWidth,
|
||||||
val sourceId = 0.U
|
coalDataWidth,
|
||||||
c.io.coalReqValid.poke(true.B)
|
numInflightCoalRequests
|
||||||
c.io.newEntry.source.poke(sourceId)
|
)
|
||||||
c.io.newEntry.lanes.foreach { l => l.valid.poke(false.B) }
|
)
|
||||||
c.io.newEntry.lanes(0).valid.poke(true.B)
|
// vcs helps with simulation time, but sometimes errors with
|
||||||
c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B)
|
// "mutation occurred during iteration" java error
|
||||||
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U)
|
// .withAnnotations(Seq(VcsBackendAnnotation))
|
||||||
c.io.newEntry.lanes(0).reqs(0).size.poke(2.U)
|
{ c =>
|
||||||
c.io.newEntry.lanes(2).valid.poke(true.B)
|
val sourceId = 0.U
|
||||||
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
|
c.io.coalReqValid.poke(true.B)
|
||||||
c.io.newEntry.lanes(2).reqs(0).offset.poke(2.U)
|
c.io.newEntry.source.poke(sourceId)
|
||||||
c.io.newEntry.lanes(2).reqs(0).size.poke(1.U)
|
c.io.newEntry.lanes(0).reqs(0).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(0).reqs(0).offset.poke(1.U)
|
||||||
|
c.io.newEntry.lanes(0).reqs(0).size.poke(2.U)
|
||||||
|
c.io.newEntry.lanes(0).reqs(1).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(0).reqs(1).offset.poke(1.U)
|
||||||
|
c.io.newEntry.lanes(0).reqs(1).size.poke(2.U)
|
||||||
|
c.io.newEntry.lanes(2).reqs(0).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(2).reqs(0).offset.poke(2.U)
|
||||||
|
c.io.newEntry.lanes(2).reqs(0).size.poke(1.U)
|
||||||
|
c.io.newEntry.lanes(2).reqs(1).valid.poke(true.B)
|
||||||
|
c.io.newEntry.lanes(2).reqs(1).offset.poke(0.U)
|
||||||
|
c.io.newEntry.lanes(2).reqs(1).size.poke(2.U)
|
||||||
|
|
||||||
c.clock.step()
|
c.clock.step()
|
||||||
|
|
||||||
c.io.coalReqValid.poke(false.B)
|
c.io.coalReqValid.poke(false.B)
|
||||||
|
|
||||||
c.clock.step()
|
c.clock.step()
|
||||||
|
|
||||||
c.io.coalRespValid.poke(true.B)
|
c.io.coalRespValid.poke(true.B)
|
||||||
c.io.coalRespSrcId.poke(sourceId)
|
c.io.coalRespSrcId.poke(sourceId)
|
||||||
val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL)
|
val lit = (BigInt(0x0123456789abcdefL) << 64) | BigInt(0x5ca1ab1edeadbeefL)
|
||||||
c.io.coalRespData.poke(lit.U)
|
c.io.coalRespData.poke(lit.U)
|
||||||
|
|
||||||
// table lookup is combinational at the same cycle
|
// table lookup is combinational at the same cycle
|
||||||
c.io.uncoalResps(0).valid.expect(true.B)
|
c.io.uncoalResps(0)(0).valid.expect(true.B)
|
||||||
c.io.uncoalResps(1).valid.expect(false.B)
|
c.io.uncoalResps(1)(0).valid.expect(false.B)
|
||||||
c.io.uncoalResps(2).valid.expect(true.B)
|
c.io.uncoalResps(2)(0).valid.expect(true.B)
|
||||||
c.io.uncoalResps(3).valid.expect(false.B)
|
c.io.uncoalResps(3)(0).valid.expect(false.B)
|
||||||
|
|
||||||
c.io.uncoalResps(0).bits.data.expect(0x89abcdefL.U)
|
c.io.uncoalResps(0)(0).bits.data.expect(0x89abcdefL.U)
|
||||||
c.io.uncoalResps(0).bits.source.expect(0.U)
|
c.io.uncoalResps(0)(0).bits.source.expect(0.U)
|
||||||
c.io.uncoalResps(2).bits.data.expect(0x5ca1ab1eL.U)
|
c.io.uncoalResps(0)(1).bits.data.expect(0x89abcdefL.U)
|
||||||
c.io.uncoalResps(2).bits.source.expect(0.U)
|
c.io.uncoalResps(0)(1).bits.source.expect(0.U)
|
||||||
}
|
c.io.uncoalResps(2)(0).bits.data.expect(0x5ca1ab1eL.U)
|
||||||
|
c.io.uncoalResps(2)(0).bits.source.expect(0.U)
|
||||||
|
c.io.uncoalResps(2)(1).bits.data.expect(0x01234567L.U)
|
||||||
|
c.io.uncoalResps(2)(1).bits.source.expect(0.U)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
||||||
behavior of "inflight coalesced request table"
|
behavior of "inflight coalesced request table"
|
||||||
val numLanes = 4
|
val numLanes = 4
|
||||||
|
val numPerLaneReqs = 2
|
||||||
val sourceWidth = 2
|
val sourceWidth = 2
|
||||||
val entries = 4
|
val entries = 4
|
||||||
|
|
||||||
@@ -91,7 +108,7 @@ class CoalInflightTableUnitTest extends AnyFlatSpec with ChiselScalatestTester {
|
|||||||
val sizeBits = 2
|
val sizeBits = 2
|
||||||
|
|
||||||
val inflightCoalReqTableEntry =
|
val inflightCoalReqTableEntry =
|
||||||
new InflightCoalReqTableEntry(numLanes, sourceWidth, offsetBits, sizeBits)
|
new InflightCoalReqTableEntry(numLanes, numPerLaneReqs, sourceWidth, offsetBits, sizeBits)
|
||||||
|
|
||||||
// it should "stop enqueueing when full" in {
|
// it should "stop enqueueing when full" in {
|
||||||
// test(new InflightCoalReqTable(numLanes, sourceWidth, entries)) { c =>
|
// test(new InflightCoalReqTable(numLanes, sourceWidth, entries)) { c =>
|
||||||
|
|||||||
Reference in New Issue
Block a user