diff --git a/src/main/scala/tilelink/Coalescing.scala b/src/main/scala/tilelink/Coalescing.scala
index 110cc04..01b0ff0 100644
--- a/src/main/scala/tilelink/Coalescing.scala
+++ b/src/main/scala/tilelink/Coalescing.scala
@@ -220,11 +220,7 @@ class CoalShiftQueue[T <: Data](
   }
 
   io.queue.enq.ready := !valid(entries - 1)
-  // We don't want to invalidate deq.valid response right away even when
-  // io.invalidate(head) is true.
-  // Coalescing unit consumes queue head's validity, and produces its new
-  // validity.  Deasserting deq.valid right away will result in a combinational
-  // cycle.
+  // TODO: making this validAfterInv(0) might be useful for the arbiter
   io.queue.deq.valid := valid(0)
   io.queue.deq.bits := elts.head
 
@@ -370,12 +366,11 @@ class MonoCoalescer(coalLogSize: Int, windowT: CoalShiftQueue[ReqQueueEntry],
 // Software model: coalescer.py
 class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueEntry,
                      config: CoalescerConfig) extends Module {
-
   val io = IO(new Bundle {
     // coalescing window, connected to the contents of the request queues
     val window = Input(Vec(config.numLanes, windowT.io.cloneType))
-    // newly generated coalesced request
-    val outReq = DecoupledIO(coalReqT.cloneType)
+    // generated coalesced request
+    val coalReq = DecoupledIO(coalReqT.cloneType)
     // invalidate signals going into each request queue's head
     val invalidate = Output(Valid(Vec(config.numLanes, UInt(config.queueDepth.W))))
   })
@@ -467,20 +462,20 @@ class MultiCoalescer(windowT: CoalShiftQueue[ReqQueueEntry], coalReqT: ReqQueueE
   }
 
   val sourceGen = Module(new ReqSourceGen(log2Ceil(config.numNewSrcIds)))
-  sourceGen.io.gen := io.outReq.fire // use up a source ID only when request is created
+  sourceGen.io.gen := io.coalReq.fire // use up a source ID only when request is created
 
   val coalesceValid = chosenValid && sourceGen.io.id.valid
 
-  io.outReq.bits.source := sourceGen.io.id.bits
-  io.outReq.bits.mask := mask.asUInt
-  io.outReq.bits.data := data.asUInt
-  io.outReq.bits.size := chosenSize
-  io.outReq.bits.address := chosenBundle.baseAddr
-  io.outReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op
-  io.outReq.valid := coalesceValid
+  io.coalReq.bits.source := sourceGen.io.id.bits
+  io.coalReq.bits.mask := mask.asUInt
+  io.coalReq.bits.data := data.asUInt
+  io.coalReq.bits.size := chosenSize
+  io.coalReq.bits.address := chosenBundle.baseAddr
+  io.coalReq.bits.op := VecInit(io.window.map(_.elts.head))(chosenBundle.leaderIdx).op
+  io.coalReq.valid := coalesceValid
 
   io.invalidate.bits := chosenBundle.matchOH
-  io.invalidate.valid := io.outReq.fire // invalidate only when fire
+  io.invalidate.valid := io.coalReq.fire // invalidate only when fire
 
   dontTouch(io.invalidate) // debug
 
@@ -547,6 +542,8 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
       reqQueue.io.queue.enq.bits := req
       // TODO: deq.ready should respect downstream ready
       reqQueue.io.queue.deq.ready := true.B
+      // invalidate queue entries that contain original core requests that got
+      // coalesced into a wider one
       reqQueue.io.invalidate.bits := coalescer.io.invalidate.bits(lane)
       reqQueue.io.invalidate.valid := coalescer.io.invalidate.valid
 
@@ -556,9 +553,9 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
 
   val (tlCoal, edgeCoal) = outer.coalescerNode.out(0)
 
-  tlCoal.a.valid := coalescer.io.outReq.valid
-  tlCoal.a.bits := coalescer.io.outReq.bits.toTLA(edgeCoal)
-  coalescer.io.outReq.ready := tlCoal.a.ready
+  tlCoal.a.valid := coalescer.io.coalReq.valid
+  tlCoal.a.bits := coalescer.io.coalReq.bits.toTLA(edgeCoal)
+  coalescer.io.coalReq.ready := tlCoal.a.ready
   tlCoal.b.ready := true.B
   tlCoal.c.valid := false.B
   // tlCoal.d.ready := true.B // this should be connected to uncoalescer's ready, done below.
@@ -692,36 +689,50 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
   )
   println(s"=========== table sourceWidth: ${sourceWidth}")
   // println(s"=========== table sizeEnumBits: ${newEntry.sizeEnumBits}")
-  newEntry.source := coalescer.io.outReq.bits.source
+  newEntry.source := coalescer.io.coalReq.bits.source
 
   // TODO: richard to write table fill logic
   assert (config.maxCoalLogSize <= config.dataBusWidth,
     "multi-beat coalesced reads/writes are currently not supported")
   assert (
     tlCoal.params.dataBits == (1 << config.dataBusWidth) * 8,
-    s"tlCoal param dataBits (${tlCoal.params.dataBits}) mismatch coalescer constant"
+    s"tlCoal param `dataBits` (${tlCoal.params.dataBits}) mismatches coalescer constant"
     + s" (${(1 << config.dataBusWidth) * 8})"
   )
-  val origReqs = reqQueues.map(q => q.io.queue.deq.bits)
-  newEntry.lanes.foreach { l =>
-    l.reqs.zipWithIndex.foreach { case (r, i) =>
-      // TODO: this part needs the actual coalescing logic to work
-      r.valid := false.B
-      r.source := origReqs(i).source
-      r.offset := (origReqs(i).address % (1 << config.maxCoalLogSize).U) >> config.wordWidth
-      r.sizeEnum := config.sizeEnum.logSizeToEnum(origReqs(i).size)
+  val reqQueueHeads = reqQueues.map(q => q.io.queue.deq.bits)
+  // newEntry.lanes.foreach { l =>
+  //   l.reqs.zipWithIndex.foreach { case (r, i) =>
+  //     // TODO: this part needs the actual coalescing logic to work
+  //     r.valid := false.B
+  //     r.source := origReqs(i).source
+  //     r.offset := (origReqs(i).address % (1 << config.maxCoalLogSize).U) >> config.wordWidth
+  //     r.sizeEnum := config.sizeEnum.logSizeToEnum(origReqs(i).size)
+  //   }
+  // }
+  // newEntry.lanes(0).reqs(0).valid := true.B
+  // newEntry.lanes(1).reqs(0).valid := true.B
+  // newEntry.lanes(2).reqs(0).valid := true.B
+  // newEntry.lanes(3).reqs(0).valid := true.B
+  (newEntry.lanes zip coalescer.io.invalidate.bits).zipWithIndex
+    .foreach { case ((laneEntry, laneInv), lane) =>
+      (laneEntry.reqs zip laneInv.asBools).foreach { case (reqEntry, inv) =>
+        // TODO: this part needs the actual coalescing logic to work
+        reqEntry.valid := inv
+        when (inv) {
+          printf(s"entry for reqQueue(${lane}) got invalidated\n")
+        }
+        // FIXME: copying over queue heads out of laziness
+        reqEntry.source := reqQueueHeads(lane).source
+        reqEntry.offset := (reqQueueHeads(lane).address % (1 << config.maxCoalLogSize).U) >> config.wordWidth
+        reqEntry.sizeEnum := config.sizeEnum.logSizeToEnum(reqQueueHeads(lane).size)
+      }
     }
-  }
-  newEntry.lanes(0).reqs(0).valid := true.B
-  newEntry.lanes(1).reqs(0).valid := true.B
-  newEntry.lanes(2).reqs(0).valid := true.B
-  newEntry.lanes(3).reqs(0).valid := true.B
   dontTouch(newEntry)
 
   // Uncoalescer module uncoalesces responses back to each lane
   val uncoalescer = Module(new UncoalescingUnit(config))
 
-  uncoalescer.io.coalReqValid := coalescer.io.outReq.valid
+  uncoalescer.io.coalReqValid := coalescer.io.coalReq.valid
   uncoalescer.io.newEntry := newEntry
   // Cleanup: custom <>?
   uncoalescer.io.coalResp.valid := tlCoal.d.valid
@@ -730,13 +741,13 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
   tlCoal.d.ready := uncoalescer.io.coalResp.ready
 
   // Queue up synthesized uncoalesced responses into each lane's response queue
-  (respQueues zip uncoalescer.io.uncoalResps).foreach { case (q, lanes) =>
-    lanes.zipWithIndex.foreach { case (resp, i) =>
+  (respQueues zip uncoalescer.io.uncoalResps).zipWithIndex.foreach { case ((q, perLaneResps), lane) =>
+    perLaneResps.zipWithIndex.foreach { case (resp, i) =>
       // TODO: rather than crashing, deassert tlOut.d.ready to stall downtream
       // cache.  This should ideally not happen though.
       assert(
         q.io.enq(respQueueCoalPortOffset + i).ready,
-        s"respQueue: enq port for 0-th coalesced response is blocked"
+        s"respQueue: enq port for coalesced response is blocked for lane ${lane}"
       )
       q.io.enq(respQueueCoalPortOffset + i).valid := resp.valid
       q.io.enq(respQueueCoalPortOffset + i).bits := resp.bits
@@ -745,7 +756,7 @@ class CoalescingUnitImp(outer: CoalescingUnit, config: CoalescerConfig) extends
   }
 
   // Debug
-  dontTouch(coalescer.io.outReq)
+  dontTouch(coalescer.io.coalReq)
   val coalRespData = tlCoal.d.bits.data
   dontTouch(coalRespData)