diff --git a/src/main/scala/tile/VortexTile.scala b/src/main/scala/tile/VortexTile.scala
index 7378475..ae59c33 100644
--- a/src/main/scala/tile/VortexTile.scala
+++ b/src/main/scala/tile/VortexTile.scala
@@ -388,11 +388,6 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
     outer.imemNodes(0).out(0)._1.a <> imemTLAdapter.io.outReq
     imemTLAdapter.io.outResp <> outer.imemNodes(0).out(0)._1.d
 
-    // Since the individual per-lane TL requests might come back out-of-sync between
-    // the lanes, but Vortex core expects the per-lane responses to be synced,
-    // we need to selectively fire responses that have the same source, and
-    // delay others.  Below is the logic that implements this.
-
     // @perf: this would duplicate SourceGenerator table for every lane and eat
     // up some area
     val dmemTLBundles = outer.dmemNodes.map(_.out.head._1)
@@ -405,7 +400,25 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
       ))
     }
 
-    // choose one source out of the arriving per-lane TL D channels
+    // Since the individual per-lane TL requests might come back out-of-sync between
+    // the lanes, but Vortex core expects the per-lane responses to be synced,
+    // we need to selectively fire responses that have the same source, and
+    // delay others.
+    //
+    // In order to do that, we pick a source from one of the valid lanes using e.g.
+    // an arbiter.  Then using the chosen source id, we
+    // - lie to core that response is not valid if source doesn't match picked, and
+    // - lie to downstream that core is not ready if source doesn't match picked.
+    //
+    // Note that we cannot do this filtering logic using TileLink source ID, because
+    // we're allocating source for each lane independently.  In that case, it's
+    // possible that lane 0's source matches lane 1/2/3's source by chance,
+    // even when they originated from different warps.  Using Vortex's dcache req tag
+    // solves this issue because they use a UUID that is unique across all requests
+    // in the program.
+    //
+    // TODO: A cleaner solution would be to simply do a synchronized allocation
+    // of a same source id for all lanes.
     val arb = Module(
       new RRArbiter(core.io.dmem.get.head.d.bits.source.cloneType, outer.numLanes)
     )
@@ -430,9 +443,6 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
       tlAdapter.io.inReq <> coreMem.a
       coreMem.d <> tlAdapter.io.inResp
     }
-    // using the chosen source id,
-    // - lie to core that response is not valid if source doesn't match picked
-    // - lie to downstream that core is not ready if source doesn't match picked
     (core.io.dmem.get zip dmemTLAdapters).zipWithIndex.foreach {
       case ((coreMem, tlAdapter), i) =>
       coreMem.d.valid := tlAdapter.io.inResp.valid && matchingSources(i)