Add EmulatorTile
also split core-specific config keys from radiance.memory to radiance.core.
This commit is contained in:
31
src/main/resources/csrc/SimEmulator.cc
Normal file
31
src/main/resources/csrc/SimEmulator.cc
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#ifndef NO_VPI
|
||||||
|
#include <vpi_user.h>
|
||||||
|
#include <svdpi.h>
|
||||||
|
#endif
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
extern "C" void emulator_init_rs(int num_lanes);
|
||||||
|
|
||||||
|
extern "C" void emulator_generate_rs(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||||
|
long long *vec_a_address,
|
||||||
|
uint8_t *vec_a_is_store, int *vec_a_size,
|
||||||
|
long long *vec_a_data, uint8_t *vec_d_ready,
|
||||||
|
uint8_t *vec_d_valid,
|
||||||
|
uint8_t *vec_d_is_store, int *vec_d_size,
|
||||||
|
uint8_t inflight, uint8_t *finished);
|
||||||
|
|
||||||
|
extern "C" void emulator_init(int num_lanes) {
|
||||||
|
emulator_init_rs(num_lanes);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" void emulator_generate(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||||
|
long long *vec_a_address,
|
||||||
|
uint8_t *vec_a_is_store, int *vec_a_size,
|
||||||
|
long long *vec_a_data, uint8_t *vec_d_ready,
|
||||||
|
uint8_t *vec_d_valid, uint8_t *vec_d_is_store,
|
||||||
|
int *vec_d_size, uint8_t inflight,
|
||||||
|
uint8_t *finished) {
|
||||||
|
emulator_generate_rs(vec_a_ready, vec_a_valid, vec_a_address, vec_a_is_store,
|
||||||
|
vec_a_size, vec_a_data, vec_d_ready, vec_d_valid,
|
||||||
|
vec_d_is_store, vec_d_size, inflight, finished);
|
||||||
|
}
|
||||||
132
src/main/resources/vsrc/SimEmulator.v
Normal file
132
src/main/resources/vsrc/SimEmulator.v
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
`include "SimDefaults.vh"
|
||||||
|
|
||||||
|
import "DPI-C" function void emulator_init(
|
||||||
|
input longint num_lanes
|
||||||
|
);
|
||||||
|
|
||||||
|
// Make sure to sync the parameters for:
|
||||||
|
// (1) import "DPI-C" declaration
|
||||||
|
// (2) C function declaration
|
||||||
|
// (3) DPI function calls inside initial/always blocks
|
||||||
|
import "DPI-C" function void emulator_generate
|
||||||
|
(
|
||||||
|
input bit vec_a_ready[`MAX_NUM_LANES],
|
||||||
|
output bit vec_a_valid[`MAX_NUM_LANES],
|
||||||
|
output longint vec_a_address[`MAX_NUM_LANES],
|
||||||
|
output bit vec_a_is_store[`MAX_NUM_LANES],
|
||||||
|
output int vec_a_size[`MAX_NUM_LANES],
|
||||||
|
output longint vec_a_data[`MAX_NUM_LANES],
|
||||||
|
|
||||||
|
output bit vec_d_ready[`MAX_NUM_LANES],
|
||||||
|
input bit vec_d_valid[`MAX_NUM_LANES],
|
||||||
|
input bit vec_d_is_store[`MAX_NUM_LANES],
|
||||||
|
input int vec_d_size[`MAX_NUM_LANES],
|
||||||
|
|
||||||
|
input bit inflight,
|
||||||
|
output bit finished
|
||||||
|
);
|
||||||
|
|
||||||
|
module SimEmulator #(parameter NUM_LANES = 4) (
|
||||||
|
input clock,
|
||||||
|
input reset,
|
||||||
|
|
||||||
|
input [NUM_LANES-1:0] a_ready,
|
||||||
|
output [NUM_LANES-1:0] a_valid,
|
||||||
|
output [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] a_address,
|
||||||
|
output [NUM_LANES-1:0] a_is_store,
|
||||||
|
output [`SIMMEM_LOGSIZE_WIDTH*NUM_LANES-1:0] a_size,
|
||||||
|
output [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] a_data,
|
||||||
|
|
||||||
|
output [NUM_LANES-1:0] d_ready,
|
||||||
|
input [NUM_LANES-1:0] d_valid,
|
||||||
|
input [NUM_LANES-1:0] d_is_store,
|
||||||
|
input [`SIMMEM_LOGSIZE_WIDTH*NUM_LANES-1:0] d_size,
|
||||||
|
// TODO: d_mask
|
||||||
|
// TODO: d_data
|
||||||
|
|
||||||
|
input inflight,
|
||||||
|
output finished
|
||||||
|
);
|
||||||
|
// "in": C->verilog, "out": verilog->C
|
||||||
|
// need to be in ascending order to match with C indexing
|
||||||
|
// C array sizes are static, so need to use MAX_NUM_LANES
|
||||||
|
bit __out_a_ready [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __in_a_valid [0:`MAX_NUM_LANES-1];
|
||||||
|
longint __in_a_address [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __in_a_is_store [0:`MAX_NUM_LANES-1];
|
||||||
|
int __in_a_size [0:`MAX_NUM_LANES-1];
|
||||||
|
longint __in_a_data [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __in_d_ready [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __out_d_valid [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __out_d_is_store [0:`MAX_NUM_LANES-1];
|
||||||
|
int __out_d_size [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __out_inflight;
|
||||||
|
bit __in_finished;
|
||||||
|
|
||||||
|
genvar g;
|
||||||
|
generate
|
||||||
|
for (g = 0; g < NUM_LANES; g = g + 1) begin
|
||||||
|
assign __out_a_ready[g] = a_ready[g];
|
||||||
|
assign a_valid[g] = __in_a_valid[g];
|
||||||
|
assign a_address[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH]
|
||||||
|
= __in_a_address[g][`SIMMEM_DATA_WIDTH-1:0];
|
||||||
|
assign a_is_store[g] = __in_a_is_store[g];
|
||||||
|
assign a_size[`SIMMEM_LOGSIZE_WIDTH*g +: `SIMMEM_LOGSIZE_WIDTH]
|
||||||
|
= __in_a_size[g][`SIMMEM_LOGSIZE_WIDTH-1:0];
|
||||||
|
assign a_data[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH]
|
||||||
|
= __in_a_data[g][`SIMMEM_DATA_WIDTH-1:0];
|
||||||
|
assign d_ready[g] = __in_d_ready[g];
|
||||||
|
assign __out_d_valid[g] = d_valid[g];
|
||||||
|
assign __out_d_is_store[g] = d_is_store[g];
|
||||||
|
assign __out_d_size[g] = d_size[`SIMMEM_LOGSIZE_WIDTH*g +: `SIMMEM_LOGSIZE_WIDTH];
|
||||||
|
end
|
||||||
|
assign __out_inflight = inflight;
|
||||||
|
endgenerate
|
||||||
|
assign finished = __in_finished;
|
||||||
|
|
||||||
|
initial begin
|
||||||
|
emulator_init(NUM_LANES);
|
||||||
|
end
|
||||||
|
|
||||||
|
// negedge is important here; the DPI logic is essentially functioning as
|
||||||
|
// a combinational logic, so we want to reflect the signal change from DPI
|
||||||
|
// at the *current* cycle, not the next.
|
||||||
|
always @(negedge clock) begin
|
||||||
|
if (reset) begin
|
||||||
|
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
|
||||||
|
__in_a_valid[tid] = 1'b0;
|
||||||
|
__in_a_address[tid] = `SIMMEM_DATA_WIDTH'b0;
|
||||||
|
__in_a_is_store[tid] = 1'b0;
|
||||||
|
__in_a_size[tid] = 32'b0;
|
||||||
|
__in_a_data[tid] = `SIMMEM_DATA_WIDTH'b0;
|
||||||
|
__in_d_ready[tid] = 1'b0;
|
||||||
|
end
|
||||||
|
__in_finished = 1'b0;
|
||||||
|
end else begin
|
||||||
|
emulator_generate(
|
||||||
|
__out_a_ready,
|
||||||
|
__in_a_valid,
|
||||||
|
__in_a_address,
|
||||||
|
__in_a_is_store,
|
||||||
|
__in_a_size,
|
||||||
|
__in_a_data,
|
||||||
|
|
||||||
|
__in_d_ready,
|
||||||
|
__out_d_valid,
|
||||||
|
__out_d_is_store,
|
||||||
|
__out_d_size,
|
||||||
|
|
||||||
|
__out_inflight,
|
||||||
|
__in_finished
|
||||||
|
);
|
||||||
|
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
|
||||||
|
$display("verilog: %04d a_valid[%d]=%d, a_address[%d]=0x%x, d_ready[%d]=%d",
|
||||||
|
$time, tid, __in_a_valid[tid], tid, __in_a_address[tid], tid, __in_d_ready[tid]);
|
||||||
|
end
|
||||||
|
|
||||||
|
if (finished) begin
|
||||||
|
$finish;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
endmodule
|
||||||
243
src/main/scala/radiance/core/Emulator.scala
Normal file
243
src/main/scala/radiance/core/Emulator.scala
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
package radiance.core
|
||||||
|
|
||||||
|
import chisel3._
|
||||||
|
import chisel3.util._
|
||||||
|
import org.chipsalliance.cde.config.{Field, Parameters}
|
||||||
|
import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp}
|
||||||
|
import freechips.rocketchip.tilelink._
|
||||||
|
import freechips.rocketchip.diplomacy.{IdRange, AddressSet, BufferParams}
|
||||||
|
import radiance.memory.{SourceGenerator, TraceLine, TLPrintf}
|
||||||
|
|
||||||
|
case class SIMTCoreParams(
|
||||||
|
nWarps: Int = 4, // # of warps in the core
|
||||||
|
nCoreLanes: Int = 4, // # of SIMT threads in the core
|
||||||
|
nMemLanes: Int = 4, // # of memory lanes in the memory interface to the
|
||||||
|
// cache; relates to the LSU lanes
|
||||||
|
nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes
|
||||||
|
)
|
||||||
|
case class MemtraceCoreParams(
|
||||||
|
tracefilename: String = "undefined",
|
||||||
|
traceHasSource: Boolean = false
|
||||||
|
)
|
||||||
|
|
||||||
|
case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/ )
|
||||||
|
case object MemtraceCoreKey
|
||||||
|
extends Field[Option[MemtraceCoreParams]](None /*default*/ )
|
||||||
|
|
||||||
|
// #############################################################################
|
||||||
|
// FIXME: copy-paste from MemFuzzer
|
||||||
|
// #############################################################################
|
||||||
|
|
||||||
|
class Emulator(
|
||||||
|
numLanes: Int,
|
||||||
|
numSrcIds: Int,
|
||||||
|
wordSizeInBytes: Int,
|
||||||
|
)(implicit p: Parameters)
|
||||||
|
extends LazyModule {
|
||||||
|
val laneNodes = Seq.tabulate(numLanes) { i =>
|
||||||
|
val clientParam = Seq(
|
||||||
|
TLMasterParameters.v1(
|
||||||
|
name = "Emulator" + i.toString,
|
||||||
|
sourceId = IdRange(0, numSrcIds)
|
||||||
|
// visibility = Seq(AddressSet(0x0000, 0xffffff))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
TLClientNode(Seq(TLMasterPortParameters.v1(clientParam)))
|
||||||
|
}
|
||||||
|
|
||||||
|
val node = TLIdentityNode()
|
||||||
|
laneNodes.foreach(node := _)
|
||||||
|
|
||||||
|
lazy val module = new EmulatorImp(this, numLanes, numSrcIds, wordSizeInBytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
class EmulatorImp(
|
||||||
|
outer: Emulator,
|
||||||
|
numLanes : Int,
|
||||||
|
numSrcIds: Int,
|
||||||
|
wordSizeInBytes: Int,
|
||||||
|
) extends LazyModuleImp(outer) {
|
||||||
|
val io = IO(new Bundle {
|
||||||
|
val finished = Output(Bool())
|
||||||
|
})
|
||||||
|
val sim = Module(new SimEmulator(numLanes))
|
||||||
|
sim.io.clock := clock
|
||||||
|
sim.io.reset := reset.asBool
|
||||||
|
|
||||||
|
sim.io.a.ready := VecInit(outer.laneNodes.map { node =>
|
||||||
|
val (tlOut, _) = node.out(0)
|
||||||
|
tlOut.a.ready
|
||||||
|
}).asUInt
|
||||||
|
|
||||||
|
io.finished := sim.io.finished
|
||||||
|
|
||||||
|
// connect Verilog <-> Chisel IO
|
||||||
|
// Verilog IO flattened across all lanes
|
||||||
|
val laneReqs = Wire(Vec(numLanes, Decoupled(new TraceLine)))
|
||||||
|
val addrW = laneReqs(0).bits.address.getWidth
|
||||||
|
val sizeW = laneReqs(0).bits.size.getWidth
|
||||||
|
val dataW = laneReqs(0).bits.data.getWidth
|
||||||
|
laneReqs.zipWithIndex.foreach { case (req, i) =>
|
||||||
|
req.valid := sim.io.a.valid(i)
|
||||||
|
req.bits.source := 0.U // DPI doesn't generate contain source id
|
||||||
|
req.bits.address := sim.io.a.address(addrW * (i + 1) - 1, addrW * i)
|
||||||
|
req.bits.is_store := sim.io.a.is_store(i)
|
||||||
|
req.bits.size := sim.io.a.size(sizeW * (i + 1) - 1, sizeW * i)
|
||||||
|
req.bits.data := sim.io.a.data(dataW * (i + 1) - 1, dataW * i)
|
||||||
|
}
|
||||||
|
sim.io.a.ready := VecInit(laneReqs.map(_.ready)).asUInt
|
||||||
|
|
||||||
|
val laneResps = Wire(Vec(numLanes, Flipped(Decoupled(new TraceLine))))
|
||||||
|
laneResps.zipWithIndex.foreach { case (resp, i) =>
|
||||||
|
resp.ready := sim.io.d.ready(i)
|
||||||
|
// TODO: not handled in DPI
|
||||||
|
resp.bits.source := DontCare
|
||||||
|
resp.bits.address := DontCare
|
||||||
|
resp.bits.data := DontCare
|
||||||
|
}
|
||||||
|
sim.io.d.valid := VecInit(laneResps.map(_.valid)).asUInt
|
||||||
|
sim.io.d.is_store := VecInit(laneResps.map(_.bits.is_store)).asUInt
|
||||||
|
sim.io.d.size := VecInit(laneResps.map(_.bits.size)).asUInt
|
||||||
|
|
||||||
|
val sourceGens = Seq.fill(numLanes)(
|
||||||
|
Module(
|
||||||
|
new SourceGenerator(
|
||||||
|
log2Ceil(numSrcIds),
|
||||||
|
ignoreInUse = false
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
val anyInflight = sourceGens.map(_.io.inflight).reduce(_ || _)
|
||||||
|
sim.io.inflight := anyInflight
|
||||||
|
|
||||||
|
// Take requests off of the queue and generate TL requests
|
||||||
|
(outer.laneNodes zip (laneReqs zip laneResps)).zipWithIndex.foreach {
|
||||||
|
case ((node, (req, resp)), lane) =>
|
||||||
|
val (tlOut, edge) = node.out(0)
|
||||||
|
|
||||||
|
// Requests --------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Core only makes accesses of granularity larger than a word, so we want
|
||||||
|
// the trace driver to act so as well.
|
||||||
|
// That means if req.size is smaller than word size, we need to pad data
|
||||||
|
// with zeros to generate a word-size request, and set mask accordingly.
|
||||||
|
val offsetInWord = req.bits.address % wordSizeInBytes.U
|
||||||
|
val subword = req.bits.size < log2Ceil(wordSizeInBytes).U
|
||||||
|
|
||||||
|
// `mask` is currently unused
|
||||||
|
// val mask = Wire(UInt(wordSizeInBytes.W))
|
||||||
|
val wordData = Wire(UInt((wordSizeInBytes * 8 * 2).W))
|
||||||
|
val sizeInBytes = Wire(UInt((sizeW + 1).W))
|
||||||
|
sizeInBytes := (1.U) << req.bits.size
|
||||||
|
// mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U)
|
||||||
|
wordData := Mux(subword, req.bits.data << (offsetInWord * 8.U), req.bits.data)
|
||||||
|
val wordAlignedAddress =
|
||||||
|
req.bits.address & ~((1 << log2Ceil(wordSizeInBytes)) - 1).U(addrW.W)
|
||||||
|
val wordAlignedSize = Mux(subword, 2.U, req.bits.size)
|
||||||
|
|
||||||
|
val sourceGen = sourceGens(lane)
|
||||||
|
sourceGen.io.gen := tlOut.a.fire
|
||||||
|
sourceGen.io.reclaim.valid := tlOut.d.fire
|
||||||
|
sourceGen.io.reclaim.bits := tlOut.d.bits.source
|
||||||
|
sourceGen.io.meta := DontCare
|
||||||
|
|
||||||
|
val (plegal, pbits) = edge.Put(
|
||||||
|
fromSource = sourceGen.io.id.bits,
|
||||||
|
toAddress = wordAlignedAddress,
|
||||||
|
lgSize = wordAlignedSize, // trace line already holds log2(size)
|
||||||
|
// data should be aligned to beatBytes
|
||||||
|
data =
|
||||||
|
(wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt
|
||||||
|
)
|
||||||
|
val (glegal, gbits) = edge.Get(
|
||||||
|
fromSource = sourceGen.io.id.bits,
|
||||||
|
toAddress = wordAlignedAddress,
|
||||||
|
lgSize = wordAlignedSize
|
||||||
|
)
|
||||||
|
val legal = Mux(req.bits.is_store, plegal, glegal)
|
||||||
|
val bits = Mux(req.bits.is_store, pbits, gbits)
|
||||||
|
|
||||||
|
tlOut.a.valid := req.valid && sourceGen.io.id.valid
|
||||||
|
req.ready := tlOut.a.ready && sourceGen.io.id.valid
|
||||||
|
|
||||||
|
when(tlOut.a.fire) {
|
||||||
|
assert(legal, "illegal TL req gen")
|
||||||
|
}
|
||||||
|
tlOut.a.bits := bits
|
||||||
|
|
||||||
|
// Responses -------------------------------------------------------------
|
||||||
|
//
|
||||||
|
tlOut.d.ready := resp.ready
|
||||||
|
resp.valid := tlOut.d.valid
|
||||||
|
resp.bits.is_store := !edge.hasData(tlOut.d.bits)
|
||||||
|
resp.bits.size := tlOut.d.bits.size
|
||||||
|
|
||||||
|
tlOut.b.ready := true.B
|
||||||
|
tlOut.c.valid := false.B
|
||||||
|
tlOut.e.valid := false.B
|
||||||
|
|
||||||
|
// debug
|
||||||
|
dontTouch(req)
|
||||||
|
when(tlOut.a.valid) {
|
||||||
|
printf(s"Lane ${lane}: ");
|
||||||
|
TLPrintf(
|
||||||
|
"Emulator",
|
||||||
|
tlOut.a.bits.source,
|
||||||
|
tlOut.a.bits.address,
|
||||||
|
tlOut.a.bits.size,
|
||||||
|
tlOut.a.bits.mask,
|
||||||
|
req.bits.is_store,
|
||||||
|
tlOut.a.bits.data,
|
||||||
|
req.bits.data
|
||||||
|
)
|
||||||
|
}
|
||||||
|
dontTouch(tlOut.a)
|
||||||
|
dontTouch(tlOut.d)
|
||||||
|
}
|
||||||
|
|
||||||
|
// when(traceFinished && allReqReclaimed && noValidReqs) {
|
||||||
|
// assert(
|
||||||
|
// false.B,
|
||||||
|
// "\n\n\nsimulation Successfully finished\n\n\n (this assertion intentional fail upon MemTracer termination)"
|
||||||
|
// )
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
class SimEmulator(numLanes: Int)
|
||||||
|
extends BlackBox(Map("NUM_LANES" -> numLanes))
|
||||||
|
with HasBlackBoxResource {
|
||||||
|
val traceLineT = new TraceLine
|
||||||
|
val addrW = traceLineT.address.getWidth
|
||||||
|
val sizeW = traceLineT.size.getWidth
|
||||||
|
val dataW = traceLineT.data.getWidth
|
||||||
|
val io = IO(new Bundle {
|
||||||
|
val clock = Input(Clock())
|
||||||
|
val reset = Input(Bool())
|
||||||
|
val inflight = Input(Bool())
|
||||||
|
val finished = Output(Bool())
|
||||||
|
|
||||||
|
val a =
|
||||||
|
new Bundle {
|
||||||
|
val ready = Input(UInt(numLanes.W))
|
||||||
|
val valid = Output(UInt(numLanes.W))
|
||||||
|
// Chisel can't interface with Verilog 2D port, so flatten all lanes into
|
||||||
|
// single wide 1D array.
|
||||||
|
val address = Output(UInt((addrW * numLanes).W))
|
||||||
|
val is_store = Output(UInt(numLanes.W))
|
||||||
|
val size = Output(UInt((sizeW * numLanes).W))
|
||||||
|
val data = Output(UInt((dataW * numLanes).W))
|
||||||
|
}
|
||||||
|
val d =
|
||||||
|
new Bundle {
|
||||||
|
val ready = Output(UInt(numLanes.W))
|
||||||
|
val valid = Input(UInt(numLanes.W))
|
||||||
|
val is_store = Input(UInt(numLanes.W))
|
||||||
|
val size = Input(UInt((sizeW * numLanes).W))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
addResource("/vsrc/SimDefaults.vh")
|
||||||
|
addResource("/vsrc/SimEmulator.v")
|
||||||
|
addResource("/csrc/SimEmulator.cc")
|
||||||
|
}
|
||||||
|
|
||||||
@@ -4,6 +4,7 @@ import freechips.rocketchip.diplomacy.LazyModule
|
|||||||
import freechips.rocketchip.subsystem._
|
import freechips.rocketchip.subsystem._
|
||||||
import org.chipsalliance.cde.config.Parameters
|
import org.chipsalliance.cde.config.Parameters
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
|
import radiance.core.{SIMTCoreKey, MemtraceCoreKey}
|
||||||
|
|
||||||
// TODO: possibly move to somewhere closer to CoalescingUnit
|
// TODO: possibly move to somewhere closer to CoalescingUnit
|
||||||
// TODO: separate coalescer config from CanHaveMemtraceCore
|
// TODO: separate coalescer config from CanHaveMemtraceCore
|
||||||
|
|||||||
@@ -10,25 +10,10 @@ import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp}
|
|||||||
import freechips.rocketchip.util.{Code, MultiPortQueue, OnePortLanePositionedQueue}
|
import freechips.rocketchip.util.{Code, MultiPortQueue, OnePortLanePositionedQueue}
|
||||||
import freechips.rocketchip.unittest._
|
import freechips.rocketchip.unittest._
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
|
import radiance.core.{SIMTCoreParams, SIMTCoreKey}
|
||||||
|
|
||||||
// TODO: find better place for these
|
|
||||||
|
|
||||||
case class SIMTCoreParams(
|
|
||||||
nWarps: Int = 4, // # of warps in the core
|
|
||||||
nCoreLanes: Int = 4, // # of SIMT threads in the core
|
|
||||||
nMemLanes: Int = 4, // # of memory lanes in the memory interface to the
|
|
||||||
// cache; relates to the LSU lanes
|
|
||||||
nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes
|
|
||||||
)
|
|
||||||
case class MemtraceCoreParams(
|
|
||||||
tracefilename: String = "undefined",
|
|
||||||
traceHasSource: Boolean = false
|
|
||||||
)
|
|
||||||
case class CoalXbarParam()
|
case class CoalXbarParam()
|
||||||
|
|
||||||
case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/ )
|
|
||||||
case object MemtraceCoreKey
|
|
||||||
extends Field[Option[MemtraceCoreParams]](None /*default*/ )
|
|
||||||
case object CoalescerKey
|
case object CoalescerKey
|
||||||
extends Field[Option[CoalescerConfig]](None /*default*/ )
|
extends Field[Option[CoalescerConfig]](None /*default*/ )
|
||||||
case object CoalXbarKey extends Field[Option[CoalXbarParam]](None /*default*/ )
|
case object CoalXbarKey extends Field[Option[CoalXbarParam]](None /*default*/ )
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import freechips.rocketchip.subsystem._
|
|||||||
import gemmini._
|
import gemmini._
|
||||||
import gemmini.Arithmetic.FloatArithmetic._
|
import gemmini.Arithmetic.FloatArithmetic._
|
||||||
import radiance.tile._
|
import radiance.tile._
|
||||||
|
import radiance.core._
|
||||||
import radiance.memory._
|
import radiance.memory._
|
||||||
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
|
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
|
||||||
|
|
||||||
@@ -106,6 +107,44 @@ class WithRadianceCores(
|
|||||||
), tensorCoreFP16, tensorCoreDecoupled, useVxCache)
|
), tensorCoreFP16, tensorCoreDecoupled, useVxCache)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class WithEmulatorCores(
|
||||||
|
n: Int,
|
||||||
|
useVxCache: Boolean
|
||||||
|
) extends Config((site, _, up) => {
|
||||||
|
case TilesLocated(InSubsystem) => {
|
||||||
|
val prev = up(TilesLocated(InSubsystem))
|
||||||
|
val idOffset = up(NumTiles)
|
||||||
|
val emulator = EmulatorTileParams(
|
||||||
|
core = VortexCoreParams(),
|
||||||
|
useVxCache = useVxCache)
|
||||||
|
List.tabulate(n)(i => EmulatorTileAttachParams(
|
||||||
|
emulator.copy(tileId = i + idOffset),
|
||||||
|
RocketCrossingParams()
|
||||||
|
)) ++ prev
|
||||||
|
}
|
||||||
|
case NumTiles => up(NumTiles) + 1
|
||||||
|
case NumRadianceCores => up(NumRadianceCores) + 1
|
||||||
|
})
|
||||||
|
|
||||||
|
class WithFuzzerCores(
|
||||||
|
n: Int,
|
||||||
|
useVxCache: Boolean
|
||||||
|
) extends Config((site, _, up) => {
|
||||||
|
case TilesLocated(InSubsystem) => {
|
||||||
|
val prev = up(TilesLocated(InSubsystem))
|
||||||
|
val idOffset = up(NumTiles)
|
||||||
|
val fuzzer = FuzzerTileParams(
|
||||||
|
core = VortexCoreParams(),
|
||||||
|
useVxCache = useVxCache)
|
||||||
|
List.tabulate(n)(i => FuzzerTileAttachParams(
|
||||||
|
fuzzer.copy(tileId = i + idOffset),
|
||||||
|
RocketCrossingParams()
|
||||||
|
)) ++ prev
|
||||||
|
}
|
||||||
|
case NumTiles => up(NumTiles) + 1
|
||||||
|
case NumRadianceCores => up(NumRadianceCores) + 1
|
||||||
|
})
|
||||||
|
|
||||||
object RadianceGemminiDataType extends Enumeration {
|
object RadianceGemminiDataType extends Enumeration {
|
||||||
type Type = Value
|
type Type = Value
|
||||||
val FP32, FP16, BF16, Int8 = Value
|
val FP32, FP16, BF16, Int8 = Value
|
||||||
@@ -244,25 +283,6 @@ class WithRadianceFrameBuffer(baseAddress: BigInt,
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
class WithFuzzerCores(
|
|
||||||
n: Int,
|
|
||||||
useVxCache: Boolean
|
|
||||||
) extends Config((site, _, up) => {
|
|
||||||
case TilesLocated(InSubsystem) => {
|
|
||||||
val prev = up(TilesLocated(InSubsystem))
|
|
||||||
val idOffset = up(NumTiles)
|
|
||||||
val fuzzer = FuzzerTileParams(
|
|
||||||
core = VortexCoreParams(),
|
|
||||||
useVxCache = useVxCache)
|
|
||||||
List.tabulate(n)(i => FuzzerTileAttachParams(
|
|
||||||
fuzzer.copy(tileId = i + idOffset),
|
|
||||||
RocketCrossingParams()
|
|
||||||
)) ++ prev
|
|
||||||
}
|
|
||||||
case NumTiles => up(NumTiles) + 1
|
|
||||||
case NumRadianceCores => up(NumRadianceCores) + 1
|
|
||||||
})
|
|
||||||
|
|
||||||
class WithRadianceCluster(
|
class WithRadianceCluster(
|
||||||
clusterId: Int,
|
clusterId: Int,
|
||||||
location: HierarchicalLocation = InSubsystem,
|
location: HierarchicalLocation = InSubsystem,
|
||||||
|
|||||||
96
src/main/scala/radiance/tile/EmulatorTile.scala
Normal file
96
src/main/scala/radiance/tile/EmulatorTile.scala
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
// See LICENSE.SiFive for license details.
|
||||||
|
// See LICENSE.Berkeley for license details.
|
||||||
|
|
||||||
|
package radiance.tile
|
||||||
|
|
||||||
|
import chisel3._
|
||||||
|
import org.chipsalliance.cde.config.Parameters
|
||||||
|
import org.chipsalliance.diplomacy.lazymodule.LazyModule
|
||||||
|
import freechips.rocketchip.resources.SimpleDevice
|
||||||
|
import freechips.rocketchip.prci.ClockCrossingType
|
||||||
|
import freechips.rocketchip.rocket._
|
||||||
|
import freechips.rocketchip.tile._
|
||||||
|
import freechips.rocketchip.tilelink._
|
||||||
|
import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile}
|
||||||
|
import freechips.rocketchip.prci.{ClockSinkParameters}
|
||||||
|
import radiance.core._
|
||||||
|
import radiance.memory.{CoalescingUnit, CoalescerKey}
|
||||||
|
|
||||||
|
// TODO: De-duplicate between this and FuzzerTile
|
||||||
|
|
||||||
|
case class EmulatorTileParams(
|
||||||
|
core: VortexCoreParams = VortexCoreParams(), // TODO: remove this
|
||||||
|
useVxCache: Boolean = false,
|
||||||
|
tileId: Int = 0,
|
||||||
|
) extends InstantiableTileParams[EmulatorTile] {
|
||||||
|
def instantiate(crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByHartIdImpl)(
|
||||||
|
implicit p: Parameters
|
||||||
|
): EmulatorTile = {
|
||||||
|
new EmulatorTile(this, crossing, lookup)
|
||||||
|
}
|
||||||
|
val clockSinkParams = ClockSinkParameters()
|
||||||
|
val blockerCtrlAddr = None
|
||||||
|
val icache = None
|
||||||
|
val dcache = None
|
||||||
|
val btb = None
|
||||||
|
val baseName = "radiance_emulator_tile"
|
||||||
|
val uniqueName = s"${baseName}_$tileId"
|
||||||
|
}
|
||||||
|
|
||||||
|
case class EmulatorTileAttachParams(
|
||||||
|
tileParams: EmulatorTileParams,
|
||||||
|
crossingParams: HierarchicalElementCrossingParamsLike
|
||||||
|
) extends CanAttachTile { type TileType = EmulatorTile }
|
||||||
|
|
||||||
|
class EmulatorTile private (
|
||||||
|
val EmulatorParams: EmulatorTileParams,
|
||||||
|
crossing: ClockCrossingType,
|
||||||
|
lookup: LookupByHartIdImpl,
|
||||||
|
q: Parameters
|
||||||
|
) extends BaseTile(EmulatorParams, crossing, lookup, q)
|
||||||
|
with SinksExternalInterrupts
|
||||||
|
with SourcesExternalNotifications {
|
||||||
|
def this(
|
||||||
|
params: EmulatorTileParams,
|
||||||
|
crossing: HierarchicalElementCrossingParamsLike,
|
||||||
|
lookup: LookupByHartIdImpl
|
||||||
|
)(implicit p: Parameters) =
|
||||||
|
this(params, crossing.crossingType, lookup, p)
|
||||||
|
|
||||||
|
val cpuDevice: SimpleDevice = new SimpleDevice("emulator", Nil)
|
||||||
|
|
||||||
|
val intOutwardNode = None
|
||||||
|
val slaveNode: TLInwardNode = TLIdentityNode()
|
||||||
|
val masterNode = visibilityNode
|
||||||
|
// val statusNode = BundleBridgeSource(() => new GroundTestStatus)
|
||||||
|
|
||||||
|
val (numLanes, numSrcIds) = p(SIMTCoreKey) match {
|
||||||
|
case Some(param) => (param.nMemLanes, param.nSrcIds)
|
||||||
|
case None => {
|
||||||
|
require(false, "emulator requires SIMTCoreKey to be defined")
|
||||||
|
(0, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// FIXME: parameterize
|
||||||
|
val wordSizeInBytes = 4
|
||||||
|
|
||||||
|
val emulator = LazyModule(new Emulator(numLanes, numSrcIds, wordSizeInBytes))
|
||||||
|
|
||||||
|
// Conditionally instantiate memory coalescer
|
||||||
|
val coalescerNode = p(CoalescerKey) match {
|
||||||
|
case Some(coalParam) => {
|
||||||
|
val coal = LazyModule(new CoalescingUnit(coalParam))
|
||||||
|
coal.cpuNode :=* TLWidthWidget(4) :=* emulator.node
|
||||||
|
coal.aggregateNode
|
||||||
|
}
|
||||||
|
case None => emulator.node
|
||||||
|
}
|
||||||
|
|
||||||
|
masterNode :=* coalescerNode
|
||||||
|
|
||||||
|
override lazy val module = new EmulatorTileModuleImp(this)
|
||||||
|
}
|
||||||
|
|
||||||
|
class EmulatorTileModuleImp(outer: EmulatorTile) extends BaseTileModuleImp(outer) {
|
||||||
|
outer.reportCease(Some(outer.emulator.module.io.finished))
|
||||||
|
}
|
||||||
@@ -13,6 +13,7 @@ import freechips.rocketchip.tile._
|
|||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile}
|
import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile}
|
||||||
import freechips.rocketchip.prci.{ClockSinkParameters}
|
import freechips.rocketchip.prci.{ClockSinkParameters}
|
||||||
|
import radiance.core.{SIMTCoreKey}
|
||||||
import radiance.memory._
|
import radiance.memory._
|
||||||
|
|
||||||
case class FuzzerTileParams(
|
case class FuzzerTileParams(
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import freechips.rocketchip.tilelink._
|
|||||||
import freechips.rocketchip.util._
|
import freechips.rocketchip.util._
|
||||||
import midas.targetutils.SynthesizePrintf
|
import midas.targetutils.SynthesizePrintf
|
||||||
import org.chipsalliance.cde.config._
|
import org.chipsalliance.cde.config._
|
||||||
|
import radiance.core._
|
||||||
import radiance.memory._
|
import radiance.memory._
|
||||||
import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceSimArgs}
|
import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceSimArgs}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user