pre-merge

This commit is contained in:
Richard Yan
2023-04-22 21:28:38 -07:00
parent 8715bede37
commit b895d304bb

View File

@@ -71,30 +71,38 @@ class RespQueueEntry(sourceWidth: Int, sizeWidth: Int, maxSize: Int) extends Bun
} }
class MonoCoalescer(size: Int, window: Seq[CoalShiftQueue]) extends Module { class MonoCoalescer[QueueT: CoalShiftQueue](coalSize: Int, coalWindow: Seq[QueueT]) extends Module {
// constructor: size, window
// inputs: none
// outputs: leader idx, base addr, match OH, match count, coverage hits
val io = IO(new Bundle { val io = IO(new Bundle {
val leader_idx = Output(UInt(log2Ceil(CoalescerConsts.NUM_LANES).W)), val leader_idx = Output(UInt(log2Ceil(CoalescerConsts.NUM_LANES).W))
val base_addr = Output(UInt(CoalescerConsts.ADDR_WIDTH.W)), val base_addr = Output(UInt(CoalescerConsts.ADDR_WIDTH.W))
val match_oh = Output(Vec(NUM_LANES, UInt(CoalescerConsts.DEPTH.W))), val match_oh = Output(Vec(CoalescerConsts.NUM_LANES, UInt(CoalescerConsts.DEPTH.W)))
val coverage_hits = Output(UInt((1 << CoalescerConsts.MAX_SIZE).W)) val coverage_hits = Output(UInt((1 << CoalescerConsts.MAX_SIZE).W))
}) })
// def can_match(req0: ReqQueueEntry, req1: ReqQueueEntry): Bool val size = coalSize
val mask = ((1 << CoalescerConsts.ADDR_WIDTH - 1) - (1 << size - 1)).U
val window = coalWindow
def can_match(req0: Valid[ReqQueueEntry], req1: Valid[ReqQueueEntry]): Bool = {
(req0.bits.op === req1.bits.op) &&
(req0.valid && req1.valid) &&
((req0.bits.address & this.mask) === (req1.bits.address & this.mask))
}
// combinational logic to drive output from window contents // combinational logic to drive output from window contents
leaders = coalWindow.map(_.head)
} }
class MultiCoalescer(sizes: Seq[Int], window: Seq[CoalShiftQueue], reqQueueEntryT: ReqQueueEntry) extends Module { class MultiCoalescer[QueueT: CoalShiftQueue]
// constructor: sizes, window: Seq[CoalShiftQueue] (sizes: Seq[Int], window: Seq[QueueT], reqQueueEntryT: ReqQueueEntry) extends Module {
// instantiate MonoCoalescers
val coalescers = sizes.map(size => Module(new MonoCoalescer(size, window)))
// inputs: none // inputs: none
// outputs: out_req: Valid(ReqQueueEntry), invalidate: Valid(Seq[UInt(LOGDEPTH.W)]) // outputs: out_req: Valid(ReqQueueEntry), invalidate: Valid(Seq[UInt(LOGDEPTH.W)])
val io = IO(new Bundle { val io = IO(new Bundle {
val out_req = Output(Valid(reqQueueEntryT.cloneType)), val out_req = Output(Valid(reqQueueEntryT.cloneType))
val invalidate = Output(Valid(UInt(log2Ceil(LOGDEPTH.W)))), val invalidate = Output(Valid(UInt(log2Ceil(CoalescerConsts.LOGDEPTH.W))))
}) })
} }
@@ -180,78 +188,78 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
assert(~tlIn.a.valid || (tlIn.a.bits.opcode === OpCode.Get || tlIn.a.bits.opcode === OpCode.PutFullData || assert(~tlIn.a.valid || (tlIn.a.bits.opcode === OpCode.Get || tlIn.a.bits.opcode === OpCode.PutFullData ||
tlIn.a.bits.opcode === OpCode.PutPartialData), "Coalescer input has unsupported TL opcode"); tlIn.a.bits.opcode === OpCode.PutPartialData), "Coalescer input has unsupported TL opcode");
req.op := tlIn.a.bits.opcode === OpCode.Get ? 0.U : 1.U req.op := tlIn.a.bits.opcode === OpCode.Get ? 0.U : 1.U
req.source := tlIn.a.bits.source req.source := tlIn.a.bits.source
req.address := tlIn.a.bits.address req.address := tlIn.a.bits.address
req.data := tlIn.a.bits.data req.data := tlIn.a.bits.data
req.size := tlIn.a.bits.size req.size := tlIn.a.bits.size
reqQueue.io.enq.valid := tlIn.a.valid reqQueue.io.enq.valid := tlIn.a.valid
reqQueue.io.enq.bits := req reqQueue.io.enq.bits := req
// TODO: deq.ready should respect downstream ready // TODO: deq.ready should respect downstream ready
reqQueue.io.deq.ready := true.B reqQueue.io.deq.ready := true.B
reqQueue.io.invalidate.bits := 0.U // TODO reqQueue.io.invalidate.bits := 0.U // TODO
reqQueue.io.invalidate.valid := false.B // TODO reqQueue.io.invalidate.valid := false.B // TODO
printf(s"reqQueue(${lane}).count=%d\n", reqQueue.io.count) printf(s"reqQueue(${lane}).count=%d\n", reqQueue.io.count)
val reqHead = reqQueue.io.deq.bits val reqHead = reqQueue.io.deq.bits
// FIXME: generate Get or Put according to read/write // FIXME: generate Get or Put according to read/write
val (reqLegal, reqBits) = edgeOut.Get( val (reqLegal, reqBits) = edgeOut.Get(
fromSource = reqHead.source, fromSource = reqHead.source,
// `toAddress` should be aligned to 2**lgSize // `toAddress` should be aligned to 2**lgSize
toAddress = reqHead.address, toAddress = reqHead.address,
lgSize = 0.U lgSize = 0.U
) )
assert(reqLegal, "unhandled illegal TL req gen") assert(reqLegal, "unhandled illegal TL req gen")
tlOut.a.bits := reqBits // TODO: this is incorrect, this does not take iinto account of queue tlOut.a.bits := reqBits // TODO: this is incorrect, this does not take iinto account of queue
tlOut.a.valid := reqQueue.io.deq.valid tlOut.a.valid := reqQueue.io.deq.valid
// Response queue // Response queue
// //
// This queue will serialize non-coalesced responses along with // This queue will serialize non-coalesced responses along with
// coalesced responses and serve them back to the core side. // coalesced responses and serve them back to the core side.
val respQueue = respQueues(lane) val respQueue = respQueues(lane)
val resp = Wire(respQueueEntryT) val resp = Wire(respQueueEntryT)
resp.source := tlOut.d.bits.source resp.source := tlOut.d.bits.source
resp.data := tlOut.d.bits.data resp.data := tlOut.d.bits.data
// TODO: read/write bit? // TODO: read/write bit?
// Queue up responses that didn't get coalesced originally ("noncoalesced" responses). // Queue up responses that didn't get coalesced originally ("noncoalesced" responses).
// Coalesced (but uncoalesced back) responses will also be enqueued into the same queue. // Coalesced (but uncoalesced back) responses will also be enqueued into the same queue.
assert( assert(
respQueue.io.enq(respQueueNoncoalPort).ready, respQueue.io.enq(respQueueNoncoalPort).ready,
"respQueue: enq port for noncoalesced response is blocked" "respQueue: enq port for noncoalesced response is blocked"
) )
respQueue.io.enq(respQueueNoncoalPort).valid := tlOut.d.valid respQueue.io.enq(respQueueNoncoalPort).valid := tlOut.d.valid
respQueue.io.enq(respQueueNoncoalPort).bits := resp respQueue.io.enq(respQueueNoncoalPort).bits := resp
// TODO: deq.ready should respect upstream ready // TODO: deq.ready should respect upstream ready
respQueue.io.deq(respQueueNoncoalPort).ready := true.B respQueue.io.deq(respQueueNoncoalPort).ready := true.B
tlIn.d.valid := respQueue.io.deq(respQueueNoncoalPort).valid tlIn.d.valid := respQueue.io.deq(respQueueNoncoalPort).valid
val respHead = respQueue.io.deq(respQueueNoncoalPort).bits val respHead = respQueue.io.deq(respQueueNoncoalPort).bits
val respBits = edgeIn.AccessAck( val respBits = edgeIn.AccessAck(
toSource = respHead.source, toSource = respHead.source,
lgSize = 0.U, lgSize = 0.U,
data = respHead.data data = respHead.data
) )
tlIn.d.bits := respBits tlIn.d.bits := respBits
// Debug only // Debug only
val inflightCounter = RegInit(UInt(32.W), 0.U) val inflightCounter = RegInit(UInt(32.W), 0.U)
when(tlOut.a.valid) { when(tlOut.a.valid) {
// don't inc/dec on simultaneous req/resp // don't inc/dec on simultaneous req/resp
when(!tlOut.d.valid) { when(!tlOut.d.valid) {
inflightCounter := inflightCounter + 1.U inflightCounter := inflightCounter + 1.U
}
}.elsewhen(tlOut.d.valid) {
inflightCounter := inflightCounter - 1.U
} }
}.elsewhen(tlOut.d.valid) {
inflightCounter := inflightCounter - 1.U
}
dontTouch(inflightCounter) dontTouch(inflightCounter)
dontTouch(tlIn.a) dontTouch(tlIn.a)
dontTouch(tlIn.d) dontTouch(tlIn.d)
dontTouch(tlOut.a) dontTouch(tlOut.a)
dontTouch(tlOut.d) dontTouch(tlOut.d)
} }
// Generate coalesced requests // Generate coalesced requests
@@ -340,7 +348,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
) )
q.io.enq(respQueueCoalPortOffset + i).valid := resp.valid q.io.enq(respQueueCoalPortOffset + i).valid := resp.valid
q.io.enq(respQueueCoalPortOffset + i).bits := resp.bits q.io.enq(respQueueCoalPortOffset + i).bits := resp.bits
// dontTouch(q.io.enq(respQueueCoalPortOffset)) // dontTouch(q.io.enq(respQueueCoalPortOffset))
} }
} }
@@ -355,12 +363,12 @@ class CoalescingUnitImp(outer: CoalescingUnit, numLanes: Int) extends LazyModule
} }
class UncoalescingUnit( class UncoalescingUnit(
val numLanes: Int, val numLanes: Int,
val numPerLaneReqs: Int, val numPerLaneReqs: Int,
val sourceWidth: Int, val sourceWidth: Int,
val coalDataWidth: Int, val coalDataWidth: Int,
val numInflightCoalRequests: Int val numInflightCoalRequests: Int
) extends Module { ) extends Module {
val inflightTable = Module( val inflightTable = Module(
new InflightCoalReqTable(numLanes, numPerLaneReqs, sourceWidth, numInflightCoalRequests) new InflightCoalReqTable(numLanes, numPerLaneReqs, sourceWidth, numInflightCoalRequests)
) )
@@ -438,11 +446,11 @@ class UncoalescingUnit(
// split the coalesced response back to individual per-lane responses with the // split the coalesced response back to individual per-lane responses with the
// right metadata. // right metadata.
class InflightCoalReqTable( class InflightCoalReqTable(
val numLanes: Int, val numLanes: Int,
val numPerLaneReqs: Int, val numPerLaneReqs: Int,
val sourceWidth: Int, val sourceWidth: Int,
val entries: Int val entries: Int
) extends Module { ) extends Module {
val offsetBits = 4 // FIXME hardcoded val offsetBits = 4 // FIXME hardcoded
val sizeBits = 2 // FIXME hardcoded val sizeBits = 2 // FIXME hardcoded
val entryT = val entryT =
@@ -516,13 +524,13 @@ class InflightCoalReqTable(
} }
class InflightCoalReqTableEntry( class InflightCoalReqTableEntry(
val numLanes: Int, val numLanes: Int,
// Maximum number of requests from a single lane that can get coalesced into a single request // Maximum number of requests from a single lane that can get coalesced into a single request
val numPerLaneReqs: Int, val numPerLaneReqs: Int,
val sourceWidth: Int, val sourceWidth: Int,
val offsetBits: Int, val offsetBits: Int,
val sizeBits: Int val sizeBits: Int
) extends Bundle { ) extends Bundle {
class CoreReq extends Bundle { class CoreReq extends Bundle {
val valid = Bool() val valid = Bool()
val offset = UInt(offsetBits.W) val offset = UInt(offsetBits.W)
@@ -543,17 +551,16 @@ class InflightCoalReqTableEntry(
// Initially copied from freechips.rocketchip.util.ShiftQueue. // Initially copied from freechips.rocketchip.util.ShiftQueue.
// If `pipe` is true, support enqueueing to a full queue when also dequeueing. // If `pipe` is true, support enqueueing to a full queue when also dequeueing.
class CoalShiftQueue[T <: Data]( class CoalShiftQueue[T <: Data](
gen: T, gen: T,
val entries: Int, val entries: Int,
pipe: Boolean = true, pipe: Boolean = true,
flow: Boolean = false flow: Boolean = false
) extends Module { ) extends Module {
val io = IO(new Bundle { val io = IO(new Bundle {
val queue = new QueueIO(gen, entries) { val queue = new QueueIO(gen, entries)
val invalidate = Input(Valid(UInt(entries.W))) val invalidate = Input(Valid(UInt(entries.W)))
val mask = Output(UInt(entries.W)) val mask = Output(UInt(entries.W))
val elts = Output(Vec(entries, gen)) val elts = Output(Vec(entries, gen))
}
}) })
private val valid = RegInit(VecInit(Seq.fill(entries) { false.B })) private val valid = RegInit(VecInit(Seq.fill(entries) { false.B }))
@@ -576,56 +583,56 @@ class CoalShiftQueue[T <: Data](
if (i == -1) true.B else if (i == entries) false.B else mask(i) if (i == -1) true.B else if (i == entries) false.B else mask(i)
} }
def paddedUsed = pad({ i: Int => used(i) }) def paddedUsed = pad({ i: Int => used(i) })
def validAfterInv(i: Int) = valid(i) && !io.invalidate(i) def validAfterInv(i: Int) = valid(i) && !io.invalidate.bits(i)
val shift = io.deq.ready || (used =/= 0.U) && !validAfterInv(0) val shift = io.queue.deq.ready || (used =/= 0.U) && !validAfterInv(0)
for (i <- 0 until entries) { for (i <- 0 until entries) {
val wdata = if (i == entries - 1) io.enq.bits else Mux(!used(i + 1), io.enq.bits, elts(i + 1)) val wdata = if (i == entries - 1) io.queue.enq.bits else Mux(!used(i + 1), io.queue.enq.bits, elts(i + 1))
val wen = Mux( val wen = Mux(
shift, shift,
(io.enq.fire && !paddedUsed(i + 1) && used(i)) || pad(validAfterInv)(i + 1), (io.queue.enq.fire && !paddedUsed(i + 1) && used(i)) || pad(validAfterInv)(i + 1),
// enqueue to the first empty slot above the top // enqueue to the first empty slot above the top
(io.enq.fire && paddedUsed(i - 1) && !used(i)) || !validAfterInv(i) (io.queue.enq.fire && paddedUsed(i - 1) && !used(i)) || !validAfterInv(i)
) )
when(wen) { elts(i) := wdata } when(wen) { elts(i) := wdata }
valid(i) := Mux( valid(i) := Mux(
shift, shift,
(io.enq.fire && !paddedUsed(i + 1) && used(i)) || pad(validAfterInv)(i + 1), (io.queue.enq.fire && !paddedUsed(i + 1) && used(i)) || pad(validAfterInv)(i + 1),
(io.enq.fire && paddedUsed(i - 1) && !used(i)) || validAfterInv(i) (io.queue.enq.fire && paddedUsed(i - 1) && !used(i)) || validAfterInv(i)
) )
} }
when(io.enq.fire) { when(io.queue.enq.fire) {
when(!io.deq.fire) { when(!io.queue.deq.fire) {
used := (used << 1.U) | 1.U used := (used << 1.U) | 1.U
} }
}.elsewhen(io.deq.fire) { }.elsewhen(io.queue.deq.fire) {
used := used >> 1.U used := used >> 1.U
} }
io.enq.ready := !valid(entries - 1) io.queue.enq.ready := !valid(entries - 1)
// We don't want to invalidate deq.valid response right away even when // We don't want to invalidate deq.valid response right away even when
// io.invalidate(head) is true. // io.invalidate(head) is true.
// Coalescing unit consumes queue head's validity, and produces its new // Coalescing unit consumes queue head's validity, and produces its new
// validity. Deasserting deq.valid right away will result in a combinational // validity. Deasserting deq.valid right away will result in a combinational
// cycle. // cycle.
io.deq.valid := valid(0) io.queue.deq.valid := valid(0)
io.deq.bits := elts.head io.queue.deq.bits := elts.head
assert(!flow, "flow-through is not implemented") assert(!flow, "flow-through is not implemented")
if (flow) { if (flow) {
when(io.enq.valid) { io.deq.valid := true.B } when(io.queue.enq.valid) { io.queue.deq.valid := true.B }
when(!valid(0)) { io.deq.bits := io.enq.bits } when(!valid(0)) { io.queue.deq.bits := io.queue.enq.bits }
} }
if (pipe) { if (pipe) {
when(io.deq.ready) { io.enq.ready := true.B } when(io.queue.deq.ready) { io.queue.enq.ready := true.B }
} }
io.mask := valid.asUInt io.mask := valid.asUInt
io.elts := elts io.elts := elts
io.count := PopCount(io.mask) io.queue.count := PopCount(io.mask)
} }
class MemTraceDriver(numLanes: Int = 4, traceFile : String = "vecadd.core1.thread4.trace")(implicit p: Parameters) extends LazyModule { class MemTraceDriver(numLanes: Int = 4, traceFile : String = "vecadd.core1.thread4.trace")(implicit p: Parameters) extends LazyModule {
@@ -659,7 +666,7 @@ class TraceReq extends Bundle {
} }
class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile : String) class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile : String)
extends LazyModuleImp(outer) extends LazyModuleImp(outer)
with UnitTestModule { with UnitTestModule {
val sim = Module( val sim = Module(
new SimMemTrace(traceFile, numLanes) new SimMemTrace(traceFile, numLanes)
@@ -737,9 +744,9 @@ class MemTraceDriverImp(outer: MemTraceDriver, numLanes: Int, traceFile : String
} }
class SimMemTrace(filename: String, numLanes: Int) class SimMemTrace(filename: String, numLanes: Int)
extends BlackBox( extends BlackBox(
Map("FILENAME" -> filename, "NUM_LANES" -> numLanes) Map("FILENAME" -> filename, "NUM_LANES" -> numLanes)
) )
with HasBlackBoxResource { with HasBlackBoxResource {
val io = IO(new Bundle { val io = IO(new Bundle {
val clock = Input(Clock()) val clock = Input(Clock())