incorporate vortex2
This commit is contained in:
Submodule src/main/resources/vsrc/vortex updated: 5ac0299f9c...62171c0788
@@ -16,15 +16,18 @@ case class VortexL1Config(
|
|||||||
coreTagWidth: Int,
|
coreTagWidth: Int,
|
||||||
writeInfoReqQSize: Int,
|
writeInfoReqQSize: Int,
|
||||||
mshrSize: Int,
|
mshrSize: Int,
|
||||||
l2ReqSourceGenSize: Int,
|
memSideSourceIds: Int,
|
||||||
uncachedAddrSets: Seq[AddressSet],
|
uncachedAddrSets: Seq[AddressSet]
|
||||||
icacheInstAddrSets: Seq[AddressSet]
|
|
||||||
) {
|
) {
|
||||||
def coreTagPlusSizeWidth: Int = {
|
def coreTagPlusSizeWidth: Int = {
|
||||||
log2Ceil(wordSize) + coreTagWidth
|
log2Ceil(wordSize) + coreTagWidth
|
||||||
}
|
}
|
||||||
|
// NOTE: This assertion depends on the fact that the Vortex cache is
|
||||||
|
// configured to have 1 bank, and that it uses MSHR id as the tag of
|
||||||
|
// memory-side requests. Otherwise, it will append bank id to the tag as
|
||||||
|
// well and break this requirement.
|
||||||
require(
|
require(
|
||||||
mshrSize == l2ReqSourceGenSize,
|
mshrSize == memSideSourceIds,
|
||||||
"MSHR size must match the number of sourceIds to downstream."
|
"MSHR size must match the number of sourceIds to downstream."
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -37,18 +40,15 @@ object defaultVortexL1Config
|
|||||||
coreTagWidth = 8,
|
coreTagWidth = 8,
|
||||||
writeInfoReqQSize = 16,
|
writeInfoReqQSize = 16,
|
||||||
mshrSize = 8,
|
mshrSize = 8,
|
||||||
l2ReqSourceGenSize = 8,
|
memSideSourceIds = 8,
|
||||||
uncachedAddrSets = Seq(AddressSet(0x2000000L, 0xffL)),
|
// Don't cache CLINT region to ensure coherent access
|
||||||
icacheInstAddrSets = Seq(AddressSet(0x80000000L, 0xfffffffL))
|
uncachedAddrSets = Seq(AddressSet(0x2000000L, 0xffffL))
|
||||||
)
|
)
|
||||||
|
|
||||||
class VortexL1Cache(config: VortexL1Config)(implicit p: Parameters)
|
class VortexL1Cache(config: VortexL1Config)(implicit p: Parameters)
|
||||||
extends LazyModule {
|
extends LazyModule {
|
||||||
// icache bank
|
val banks = Seq.tabulate(config.numBanks) { bankId =>
|
||||||
val icache_bank = LazyModule(new VortexBank(config, 0, isICache = true))
|
// helps with name mangling in Verilog
|
||||||
|
|
||||||
// dcache banks
|
|
||||||
val dcache_banks = Seq.tabulate(config.numBanks) { bankId =>
|
|
||||||
val bank = LazyModule(new VortexBank(config, bankId))
|
val bank = LazyModule(new VortexBank(config, bankId))
|
||||||
bank
|
bank
|
||||||
}
|
}
|
||||||
@@ -61,15 +61,13 @@ class VortexL1Cache(config: VortexL1Config)(implicit p: Parameters)
|
|||||||
// core-side crossbar that arbitrates core requests to banks
|
// core-side crossbar that arbitrates core requests to banks
|
||||||
protected val bankXbar = LazyModule(new TLXbar)
|
protected val bankXbar = LazyModule(new TLXbar)
|
||||||
bankXbar.node :=* coresideNode
|
bankXbar.node :=* coresideNode
|
||||||
dcache_banks.foreach { _.coalToVxCacheNode :=* bankXbar.node }
|
banks.foreach { _.coresideNode :=* bankXbar.node }
|
||||||
passThrough.coalToVxCacheNode :=* bankXbar.node
|
passThrough.coresideNode :=* bankXbar.node
|
||||||
icache_bank.coalToVxCacheNode :=* bankXbar.node
|
|
||||||
|
|
||||||
// master node that exposes to and drives the downstream
|
// master node that exposes to and drives the downstream
|
||||||
val masterNode = TLIdentityNode()
|
val masterNode = TLIdentityNode()
|
||||||
dcache_banks.foreach { masterNode := _.vxCacheToL2Node }
|
banks.foreach { masterNode := _.vxCacheToL2Node }
|
||||||
masterNode := passThrough.vxCacheToL2Node
|
masterNode := passThrough.vxCacheToL2Node
|
||||||
masterNode := icache_bank.vxCacheToL2Node
|
|
||||||
|
|
||||||
lazy val module = new LazyModuleImp(this)
|
lazy val module = new LazyModuleImp(this)
|
||||||
}
|
}
|
||||||
@@ -101,7 +99,12 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters)
|
|||||||
clients = Seq(
|
clients = Seq(
|
||||||
TLMasterParameters.v1(
|
TLMasterParameters.v1(
|
||||||
name = "VortexBank",
|
name = "VortexBank",
|
||||||
sourceId = IdRange(0, 1 << (log2Ceil(config.l2ReqSourceGenSize) + 5)),
|
sourceId = IdRange(
|
||||||
|
0,
|
||||||
|
1 << (log2Ceil(
|
||||||
|
config.memSideSourceIds
|
||||||
|
) + 5 /*FIXME: give more sourceId so that passthrough doesn't block; hacky*/ )
|
||||||
|
),
|
||||||
supportsProbe = TransferSizes(1, config.wordSize),
|
supportsProbe = TransferSizes(1, config.wordSize),
|
||||||
supportsGet = TransferSizes(1, config.wordSize),
|
supportsGet = TransferSizes(1, config.wordSize),
|
||||||
supportsPutFull = TransferSizes(1, config.wordSize),
|
supportsPutFull = TransferSizes(1, config.wordSize),
|
||||||
@@ -111,14 +114,14 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters)
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
val coalToVxCacheNode = TLManagerNode(managerParam)
|
val coresideNode = TLManagerNode(managerParam)
|
||||||
val vxCacheFetchNode = TLClientNode(clientParam)
|
val vxCacheFetchNode = TLClientNode(clientParam)
|
||||||
val vxCacheToL2Node = TLIdentityNode()
|
val vxCacheToL2Node = TLIdentityNode()
|
||||||
vxCacheToL2Node := TLWidthWidget(config.cacheLineSize) := vxCacheFetchNode
|
vxCacheToL2Node := TLWidthWidget(config.cacheLineSize) := vxCacheFetchNode
|
||||||
|
|
||||||
// the implementation to make everything a pass through
|
// passthrough logic
|
||||||
lazy val module = new LazyModuleImp(this) {
|
lazy val module = new LazyModuleImp(this) {
|
||||||
val (upstream, _) = coalToVxCacheNode.in(0)
|
val (upstream, _) = coresideNode.in(0)
|
||||||
val (downstream, _) = vxCacheFetchNode.out(0)
|
val (downstream, _) = vxCacheFetchNode.out(0)
|
||||||
|
|
||||||
downstream.a <> upstream.a
|
downstream.a <> upstream.a
|
||||||
@@ -129,28 +132,22 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters)
|
|||||||
class VortexBank(
|
class VortexBank(
|
||||||
config: VortexL1Config,
|
config: VortexL1Config,
|
||||||
bankId: Int,
|
bankId: Int,
|
||||||
isICache: Boolean = false
|
|
||||||
)(implicit p: Parameters)
|
)(implicit p: Parameters)
|
||||||
extends LazyModule {
|
extends LazyModule {
|
||||||
// Generate AddressSet by excluding Addr we don't want
|
// Generate AddressSet by excluding Addr we don't want
|
||||||
def generateAddressSets(): Seq[AddressSet] = {
|
def generateAddressSets(): Seq[AddressSet] = {
|
||||||
if (isICache) {
|
// suppose have 4 bank
|
||||||
config.icacheInstAddrSets
|
// base for bank 1: ...000000|01|0000
|
||||||
// Seq(AddressSet(0x00000000L, 0xFFFFFFFFL))
|
// mask for bank 1; 111111|00|1111
|
||||||
} else {
|
val base = 0x00000000L | (bankId * config.wordSize)
|
||||||
// suppose have 4 bank
|
val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.wordSize)
|
||||||
// base for bank 1: ...000000|01|0000
|
|
||||||
// mask for bank 1; 111111|00|1111
|
|
||||||
val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.wordSize)
|
|
||||||
val base = 0x00000000L | (bankId * config.wordSize)
|
|
||||||
|
|
||||||
val excludeSets = (config.uncachedAddrSets ++ config.icacheInstAddrSets)
|
val excludeSets = config.uncachedAddrSets
|
||||||
var remainingSets: Seq[AddressSet] = Seq(AddressSet(base, mask))
|
var remainingSets: Seq[AddressSet] = Seq(AddressSet(base, mask))
|
||||||
for (excludeSet <- excludeSets) {
|
for (excludeSet <- excludeSets) {
|
||||||
remainingSets = remainingSets.flatMap(_.subtract(excludeSet))
|
remainingSets = remainingSets.flatMap(_.subtract(excludeSet))
|
||||||
}
|
|
||||||
remainingSets
|
|
||||||
}
|
}
|
||||||
|
remainingSets
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slave node to upstream
|
// Slave node to upstream
|
||||||
@@ -177,7 +174,7 @@ class VortexBank(
|
|||||||
clients = Seq(
|
clients = Seq(
|
||||||
TLMasterParameters.v1(
|
TLMasterParameters.v1(
|
||||||
name = "VortexBank",
|
name = "VortexBank",
|
||||||
sourceId = IdRange(0, config.l2ReqSourceGenSize),
|
sourceId = IdRange(0, config.memSideSourceIds),
|
||||||
supportsProbe = TransferSizes(1, config.wordSize),
|
supportsProbe = TransferSizes(1, config.wordSize),
|
||||||
supportsGet = TransferSizes(1, config.wordSize),
|
supportsGet = TransferSizes(1, config.wordSize),
|
||||||
supportsPutFull = TransferSizes(1, config.wordSize),
|
supportsPutFull = TransferSizes(1, config.wordSize),
|
||||||
@@ -187,7 +184,8 @@ class VortexBank(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
val coalToVxCacheNode = TLManagerNode(managerParam)
|
// Core -> VxCache
|
||||||
|
val coresideNode = TLManagerNode(managerParam)
|
||||||
val vxCacheToL2Node = TLIdentityNode()
|
val vxCacheToL2Node = TLIdentityNode()
|
||||||
val vxCacheFetchNode = TLClientNode(clientParam)
|
val vxCacheFetchNode = TLClientNode(clientParam)
|
||||||
|
|
||||||
@@ -203,7 +201,7 @@ class VortexBankImp(
|
|||||||
config: VortexL1Config
|
config: VortexL1Config
|
||||||
) extends LazyModuleImp(outer) {
|
) extends LazyModuleImp(outer) {
|
||||||
val vxCache = Module(
|
val vxCache = Module(
|
||||||
new VX_cache(
|
new VX_cache_top(
|
||||||
WORD_SIZE = config.wordSize,
|
WORD_SIZE = config.wordSize,
|
||||||
CACHE_LINE_SIZE = config.cacheLineSize,
|
CACHE_LINE_SIZE = config.cacheLineSize,
|
||||||
CORE_TAG_WIDTH = config.coreTagPlusSizeWidth,
|
CORE_TAG_WIDTH = config.coreTagPlusSizeWidth,
|
||||||
@@ -250,7 +248,7 @@ class VortexBankImp(
|
|||||||
|
|
||||||
// Translate TL request from Coalescer to requests for VX_cache
|
// Translate TL request from Coalescer to requests for VX_cache
|
||||||
def TLReq2VXReq = {
|
def TLReq2VXReq = {
|
||||||
val (tlInFromCoal, _) = outer.coalToVxCacheNode.in.head
|
val (tlInFromCoal, _) = outer.coresideNode.in.head
|
||||||
|
|
||||||
// coal -> vxCache
|
// coal -> vxCache
|
||||||
tlInFromCoal.a.ready :=
|
tlInFromCoal.a.ready :=
|
||||||
@@ -327,12 +325,12 @@ class VortexBankImp(
|
|||||||
tlInFromCoal.d.bits.data := vxCache.io.core_rsp_data
|
tlInFromCoal.d.bits.data := vxCache.io.core_rsp_data
|
||||||
}
|
}
|
||||||
|
|
||||||
// Since Vortex L1 is a write-through cache, it doesn't bookkeep writes and
|
// Since Vortex L1 is a write-through cache, it doesn't bookkeep writes in
|
||||||
// therefore doesn't allocate a new UUID for write requests. We use a
|
// its MSHR and therefore doesn't allocate a new tag id for write requests.
|
||||||
// separate source ID allocator to solve this.
|
// We use a separate source ID allocator to solve this.
|
||||||
val sourceGen = Module(
|
val sourceGen = Module(
|
||||||
new NewSourceGenerator(
|
new NewSourceGenerator(
|
||||||
log2Ceil(config.l2ReqSourceGenSize),
|
log2Ceil(config.memSideSourceIds),
|
||||||
metadata = Some(UInt(32.W)),
|
metadata = Some(UInt(32.W)),
|
||||||
ignoreInUse = false
|
ignoreInUse = false
|
||||||
)
|
)
|
||||||
@@ -389,70 +387,82 @@ class VortexBankImp(
|
|||||||
VXReq2TLReq
|
VXReq2TLReq
|
||||||
}
|
}
|
||||||
|
|
||||||
class VX_cache(
|
class VX_cache_top(
|
||||||
CACHE_ID: Int = 0, // seems to be only used for debug trace prints
|
// these values should match the default settings in Verilog
|
||||||
|
// TODO: INSTANCE_ID
|
||||||
CACHE_SIZE: Int = 16384 / 4, // <FIXME, divided by 4 for faster simulation
|
CACHE_SIZE: Int = 16384 / 4, // <FIXME, divided by 4 for faster simulation
|
||||||
CACHE_LINE_SIZE: Int = 16,
|
CACHE_LINE_SIZE: Int = 16,
|
||||||
NUM_PORTS: Int = 1,
|
NUM_WAYS: Int = 4,
|
||||||
WORD_SIZE: Int =
|
// for single-bank configuration, set NUM_REQS = 1 and instead set
|
||||||
16, // hack - one "word" is enough to satisfy all 4 warps after decoalescing.
|
// WORD_SIZE to something wider than 4
|
||||||
CREQ_SIZE: Int = 0,
|
WORD_SIZE: Int = 16,
|
||||||
CRSQ_SIZE: Int = 2,
|
CRSQ_SIZE: Int = 2,
|
||||||
MSHR_SIZE: Int = 8,
|
MSHR_SIZE: Int = 16,
|
||||||
MRSQ_SIZE: Int = 0,
|
MRSQ_SIZE: Int = 0,
|
||||||
MREQ_SIZE: Int = 4,
|
MREQ_SIZE: Int = 4,
|
||||||
WRITE_ENABLE: Int = 1,
|
WRITE_ENABLE: Int = 1,
|
||||||
|
UUID_WIDTH: Int = 0, // FIXME: should be different for debug
|
||||||
CORE_TAG_WIDTH: Int =
|
CORE_TAG_WIDTH: Int =
|
||||||
10, // source ID ranges from 0 to 1 << 10, we need to allocate upper bits to save size
|
16, // source ID ranges from 0 to 1 << 10, we need to allocate upper bits to save size
|
||||||
CORE_TAG_ID_BITS: Int =
|
CORE_OUT_REG : Int = 0,
|
||||||
5, // no idea what this is, just match it with default L1 dcache
|
MEM_OUT_REG : Int = 0,
|
||||||
BANK_ADDR_OFFSET: Int = 0,
|
|
||||||
NC_ENABLE: Int = 0, // NC_ENABLE=1 means the cache becomes a passthrough
|
|
||||||
WORD_ADDR_WIDTH: Int = 28, // 16 byte "word" = 4 bits
|
|
||||||
MEM_TAG_WIDTH: Int =
|
|
||||||
14, // Elaborated value is also completely different from (32 - log2Ceil(CACHE_LINE_SIZE)). This should match with sourceIds on client node associated with this cache
|
|
||||||
MEM_ADDR_WIDTH: Int = 28 // 16 byte cache line = 4 bits
|
|
||||||
) extends BlackBox(
|
) extends BlackBox(
|
||||||
Map(
|
Map(
|
||||||
"CACHE_ID" -> CACHE_ID,
|
// NOTE: NUM_REQS is analogous to SIMD width, whereas NUM_BANKS is the
|
||||||
"NUM_REQS" -> 1, // force to instantiate single bank by setting NUM_REQS to 1
|
// actual number of banks. VX_cache.sv instantiates VX_stream_xbar
|
||||||
|
// that arbitrates the higher NUM_REQS into NUM_BANKS. Since we do
|
||||||
|
// that logic ourselves using TL units, fix those params to 1 for the
|
||||||
|
// Verilog side.
|
||||||
|
"NUM_REQS" -> 1,
|
||||||
"CACHE_SIZE" -> CACHE_SIZE,
|
"CACHE_SIZE" -> CACHE_SIZE,
|
||||||
"CACHE_LINE_SIZE" -> CACHE_LINE_SIZE,
|
"LINE_SIZE" -> CACHE_LINE_SIZE,
|
||||||
"NUM_PORTS" -> NUM_PORTS,
|
// NUM_BANKS is set to 1 to treat a whole VX_cache_top instance as a
|
||||||
|
// single bank
|
||||||
|
"NUM_BANKS" -> 1,
|
||||||
|
"NUM_WAYS" -> NUM_WAYS,
|
||||||
"WORD_SIZE" -> WORD_SIZE,
|
"WORD_SIZE" -> WORD_SIZE,
|
||||||
"CREQ_SIZE" -> CREQ_SIZE,
|
|
||||||
"CRSQ_SIZE" -> CRSQ_SIZE,
|
"CRSQ_SIZE" -> CRSQ_SIZE,
|
||||||
"MSHR_SIZE" -> MSHR_SIZE,
|
"MSHR_SIZE" -> MSHR_SIZE,
|
||||||
"MRSQ_SIZE" -> MRSQ_SIZE,
|
"MRSQ_SIZE" -> MRSQ_SIZE,
|
||||||
"MREQ_SIZE" -> MREQ_SIZE,
|
"MREQ_SIZE" -> MREQ_SIZE,
|
||||||
"WRITE_ENABLE" -> WRITE_ENABLE,
|
"WRITE_ENABLE" -> WRITE_ENABLE,
|
||||||
"CORE_TAG_WIDTH" -> CORE_TAG_WIDTH,
|
"UUID_WIDTH" -> UUID_WIDTH,
|
||||||
"CORE_TAG_ID_BITS" -> CORE_TAG_ID_BITS,
|
"TAG_WIDTH" -> CORE_TAG_WIDTH,
|
||||||
"MEM_TAG_WIDTH" -> MEM_TAG_WIDTH,
|
"CORE_OUT_REG" -> CORE_OUT_REG,
|
||||||
"BANK_ADDR_OFFSET" -> BANK_ADDR_OFFSET,
|
"MEM_OUT_REG" -> MEM_OUT_REG,
|
||||||
"NC_ENABLE" -> NC_ENABLE
|
// Although VX_cache_top exposes it as a parameter, MEM_TAG_WIDTH is
|
||||||
|
// not really configurable -- it is set to be a concatenation of the
|
||||||
|
// MSHR id and cache bank id. Instead of trying to configure it from
|
||||||
|
// Chisel side, we try to figure out its value that's elaborated in the
|
||||||
|
// Verilog side and configure the Chisel io width correspondingly.
|
||||||
|
// "MEM_TAG_WIDTH" -> MEM_TAG_WIDTH
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
with HasBlackBoxResource {
|
with HasBlackBoxResource {
|
||||||
|
|
||||||
|
def memTagWidth(mshrSize: Int, numBanks: Int): Int =
|
||||||
|
log2Ceil(mshrSize) + log2Ceil(numBanks)
|
||||||
|
val MEM_TAG_WIDTH = memTagWidth(MSHR_SIZE, 1/* NUM_BANKS */)
|
||||||
|
|
||||||
|
// These logic is fixed in VX_cache_define.vh
|
||||||
|
val memAddrWidth = 32 // FIXME hardcoded
|
||||||
|
val cacheWordAddrWidth = 32 - log2Ceil(WORD_SIZE)
|
||||||
|
val cacheMemAddrWidth = 32 - log2Ceil(CACHE_LINE_SIZE)
|
||||||
|
|
||||||
val io = IO(new Bundle {
|
val io = IO(new Bundle {
|
||||||
val clk = Input(Clock())
|
val clk = Input(Clock())
|
||||||
val reset = Input(Reset())
|
val reset = Input(Reset())
|
||||||
|
|
||||||
// We should be able to turn the following into TileLink easily
|
|
||||||
|
|
||||||
// CACHE <> CORE
|
// CACHE <> CORE
|
||||||
val core_req_valid = Input(Bool())
|
val core_req_valid = Input(Bool())
|
||||||
val core_req_rw = Input(Bool())
|
val core_req_rw = Input(Bool())
|
||||||
val core_req_addr = Input(UInt(WORD_ADDR_WIDTH.W))
|
|
||||||
val core_req_byteen = Input(UInt(WORD_SIZE.W))
|
val core_req_byteen = Input(UInt(WORD_SIZE.W))
|
||||||
|
val core_req_addr = Input(UInt(cacheWordAddrWidth.W))
|
||||||
val core_req_data = Input(UInt((WORD_SIZE * 8).W))
|
val core_req_data = Input(UInt((WORD_SIZE * 8).W))
|
||||||
val core_req_tag = Input(UInt(CORE_TAG_WIDTH.W))
|
val core_req_tag = Input(UInt(CORE_TAG_WIDTH.W))
|
||||||
val core_req_ready = Output(Bool())
|
val core_req_ready = Output(Bool())
|
||||||
|
|
||||||
val core_rsp_valid = Output(Bool()) // 1 bit wide
|
val core_rsp_valid = Output(Bool()) // 1 bit wide
|
||||||
val core_rsp_tmask =
|
|
||||||
Output(Bool()) // 1 bit wide, probably can ignore (check waveform)
|
|
||||||
val core_rsp_data = Output(UInt((WORD_SIZE * 8).W))
|
val core_rsp_data = Output(UInt((WORD_SIZE * 8).W))
|
||||||
val core_rsp_tag = Output(UInt(CORE_TAG_WIDTH.W))
|
val core_rsp_tag = Output(UInt(CORE_TAG_WIDTH.W))
|
||||||
val core_rsp_ready = Input(Bool())
|
val core_rsp_ready = Input(Bool())
|
||||||
@@ -461,7 +471,7 @@ class VX_cache(
|
|||||||
val mem_req_valid = Output(Bool())
|
val mem_req_valid = Output(Bool())
|
||||||
val mem_req_rw = Output(Bool())
|
val mem_req_rw = Output(Bool())
|
||||||
val mem_req_byteen = Output(UInt(CACHE_LINE_SIZE.W))
|
val mem_req_byteen = Output(UInt(CACHE_LINE_SIZE.W))
|
||||||
val mem_req_addr = Output(UInt(MEM_ADDR_WIDTH.W))
|
val mem_req_addr = Output(UInt(cacheMemAddrWidth.W))
|
||||||
val mem_req_data = Output(UInt((CACHE_LINE_SIZE * 8).W))
|
val mem_req_data = Output(UInt((CACHE_LINE_SIZE * 8).W))
|
||||||
val mem_req_tag = Output(UInt(MEM_TAG_WIDTH.W))
|
val mem_req_tag = Output(UInt(MEM_TAG_WIDTH.W))
|
||||||
val mem_req_ready = Input(Bool())
|
val mem_req_ready = Input(Bool())
|
||||||
@@ -472,133 +482,15 @@ class VX_cache(
|
|||||||
val mem_rsp_ready = Output(Bool())
|
val mem_rsp_ready = Output(Bool())
|
||||||
})
|
})
|
||||||
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_dispatch.sv")
|
addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_bank.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_issue.sv")
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_bypass.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_data.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_define.vh")
|
addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_define.vh")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_warp_sched.sv")
|
addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_init.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sat.sv")
|
addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_mshr.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_stride.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_lerp.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_addr.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_mem.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_format.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sampler.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_unit.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_define.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_wrap.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_scope.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_fpu_unit.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_scoreboard.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_writeback.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_muldiv.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_decode.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_ibuffer.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_icache_stage.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_gpu_unit.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_trace_instr.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_gpu_types.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_config.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_lzc.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_fifo_queue.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_scan.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_find_first.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_multiplier.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_remove.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_pipe_register.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_encoder.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_reset_relay.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_popcount.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_insert.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_skid_buffer.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_fixed_arbiter.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_shift_register.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_index_buffer.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_encoder.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_matrix_arbiter.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_dp_ram.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_axi_adapter.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_elastic_buffer.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_rr_arbiter.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_arbiter.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_sp_ram.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_demux.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_serial_div.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_fair_arbiter.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_pending_size.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_define.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_csr_data.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_cache_arb.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_ipdom_stack.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_gpr_stage.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_execute.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_fetch.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_alu_unit.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_platform.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_commit.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_pipeline.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_lsu_unit.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_csr_unit.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/VX_config.h")
|
|
||||||
addResource("/vsrc/vortex/sim/common/rvfloats.h")
|
|
||||||
addResource("/vsrc/vortex/sim/common/rvfloats.cpp")
|
|
||||||
addResource("/csrc/softfloat/include/internals.h")
|
|
||||||
addResource("/csrc/softfloat/include/primitives.h")
|
|
||||||
addResource("/csrc/softfloat/include/primitiveTypes.h")
|
|
||||||
addResource("/csrc/softfloat/include/softfloat.h")
|
|
||||||
addResource("/csrc/softfloat/include/softfloat_types.h")
|
|
||||||
addResource("/csrc/softfloat/RISCV/specialize.h")
|
|
||||||
addResource("/vsrc/vortex/hw/dpi/float_dpi.cpp")
|
|
||||||
addResource("/vsrc/vortex/hw/dpi/float_dpi.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/dpi/util_dpi.cpp")
|
|
||||||
addResource("/vsrc/vortex/hw/dpi/util_dpi.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_dpi.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_define.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_types.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_rsp_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_csr_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_join_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_cache_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_memsys_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_decode_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_writeback_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpu_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_pipeline_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_rsp_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_cmt_to_csr_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_to_alu_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_rsp_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_alu_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ibuffer_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_branch_ctl_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_rsp_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_lsu_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_wstall_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_rsp_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_to_csr_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_commit_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_warp_ctl_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_rsp_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fetch_to_csr_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_tex_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_req_if.sv")
|
|
||||||
// addResource("/vsrc/vortex/hw/rtl/cache/VX_shared_mem.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/cache/VX_core_rsp_merge.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/cache/VX_tag_access.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/cache/VX_core_req_bank_sel.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/cache/VX_bank.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/cache/VX_data_access.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/cache/VX_flush_ctrl.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/cache/VX_nc_bypass.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/cache/VX_miss_resrv.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/cache/VX_cache.sv")
|
addResource("/vsrc/vortex/hw/rtl/cache/VX_cache.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_tags.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_top.sv")
|
||||||
}
|
}
|
||||||
|
|
||||||
// <FIXME> Delete the following NewSourceGenerator when merging with origin/graphics
|
// <FIXME> Delete the following NewSourceGenerator when merging with origin/graphics
|
||||||
@@ -82,7 +82,11 @@ class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, _, up) => {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
class WithCoalescer(nNewSrcIds: Int = 8) extends Config((site, _, up) => {
|
// When `enable` is false, we still elaborate Coalescer, but it acts as a
|
||||||
|
// pass-through logic that always outputs un-coalesced requests. This is
|
||||||
|
// useful for when we want to keep the generated wire and net names the same
|
||||||
|
// to e.g. compare waveforms.
|
||||||
|
class WithCoalescer(nNewSrcIds: Int = 8, enable : Boolean = true) extends Config((site, _, up) => {
|
||||||
case CoalescerKey => {
|
case CoalescerKey => {
|
||||||
val (nLanes, numOldSrcIds) = up(SIMTCoreKey, site) match {
|
val (nLanes, numOldSrcIds) = up(SIMTCoreKey, site) match {
|
||||||
case Some(param) => (param.nLanes, param.nSrcIds)
|
case Some(param) => (param.nLanes, param.nSrcIds)
|
||||||
@@ -104,6 +108,7 @@ class WithCoalescer(nNewSrcIds: Int = 8) extends Config((site, _, up) => {
|
|||||||
|
|
||||||
// Note: this config chooses a single-sized coalescing logic by default.
|
// Note: this config chooses a single-sized coalescing logic by default.
|
||||||
Some(DefaultCoalescerConfig.copy(
|
Some(DefaultCoalescerConfig.copy(
|
||||||
|
enable = enable,
|
||||||
numLanes = nLanes,
|
numLanes = nLanes,
|
||||||
numOldSrcIds = numOldSrcIds,
|
numOldSrcIds = numOldSrcIds,
|
||||||
numNewSrcIds = nNewSrcIds,
|
numNewSrcIds = nNewSrcIds,
|
||||||
@@ -150,4 +155,4 @@ class WithNCustomSmallRocketCores(
|
|||||||
crossing
|
crossing
|
||||||
)) ++ prev
|
)) ++ prev
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -10,26 +10,26 @@ import org.chipsalliance.cde.config.Parameters
|
|||||||
import freechips.rocketchip.tile._
|
import freechips.rocketchip.tile._
|
||||||
|
|
||||||
class VortexBundleA(
|
class VortexBundleA(
|
||||||
sourceWidth: Int,
|
tagWidth: Int,
|
||||||
dataWidth: Int
|
dataWidth: Int
|
||||||
) extends Bundle {
|
) extends Bundle {
|
||||||
assert(dataWidth % 8 == 0)
|
assert(dataWidth % 8 == 0)
|
||||||
val opcode = UInt(3.W) // FIXME: hardcoded
|
val opcode = UInt(3.W) // FIXME: hardcoded
|
||||||
val size = UInt(4.W) // FIXME: hardcoded
|
val size = UInt(4.W) // FIXME: hardcoded
|
||||||
val source = UInt(sourceWidth.W) // FIXME: hardcoded
|
val source = UInt(tagWidth.W) // FIXME: hardcoded
|
||||||
val address = UInt(32.W) // FIXME: hardcoded
|
val address = UInt(32.W) // FIXME: hardcoded
|
||||||
val mask = UInt((dataWidth / 8).W) // FIXME: hardcoded
|
val mask = UInt((dataWidth / 8).W) // FIXME: hardcoded
|
||||||
val data = UInt(dataWidth.W) // FIXME: hardcoded
|
val data = UInt(dataWidth.W) // FIXME: hardcoded
|
||||||
}
|
}
|
||||||
|
|
||||||
class VortexBundleD(
|
class VortexBundleD(
|
||||||
sourceWidth: Int,
|
tagWidth: Int,
|
||||||
dataWidth: Int
|
dataWidth: Int
|
||||||
) extends Bundle {
|
) extends Bundle {
|
||||||
assert(dataWidth % 8 == 0)
|
assert(dataWidth % 8 == 0)
|
||||||
val opcode = UInt(3.W) // FIXME: hardcoded
|
val opcode = UInt(3.W) // FIXME: hardcoded
|
||||||
val size = UInt(4.W) // FIXME: hardcoded
|
val size = UInt(4.W) // FIXME: hardcoded
|
||||||
val source = UInt(sourceWidth.W) // FIXME: hardcoded
|
val source = UInt(tagWidth.W) // FIXME: hardcoded
|
||||||
val data = UInt(dataWidth.W) // FIXME: hardcoded
|
val data = UInt(dataWidth.W) // FIXME: hardcoded
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -42,20 +42,60 @@ class VortexBundle(tile: VortexTile)(implicit p: Parameters) extends CoreBundle
|
|||||||
|
|
||||||
// conditionally instantiate ports depending on whether we want to use VX_cache or not
|
// conditionally instantiate ports depending on whether we want to use VX_cache or not
|
||||||
val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle {
|
val imem = if (!tile.vortexParams.useVxCache) Some(Vec(1, new Bundle {
|
||||||
val a = Decoupled(new VortexBundleA(sourceWidth = 10, dataWidth = 32))
|
val a = Decoupled(new VortexBundleA(tagWidth = tile.imemTagWidth, dataWidth = 32))
|
||||||
val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 10, dataWidth = 32)))
|
val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.imemTagWidth, dataWidth = 32)))
|
||||||
})) else None
|
})) else None
|
||||||
val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle {
|
val dmem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle {
|
||||||
val a = Decoupled(new VortexBundleA(sourceWidth = 10, dataWidth = 32))
|
// val a = Decoupled(new VortexBundleA(tagWidth = tile.dmemTagWidth, dataWidth = 32))
|
||||||
val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 10, dataWidth = 32)))
|
// val d = Flipped(Decoupled(new VortexBundleD(tagWidth = dmemTagWidth, dataWidth = 32)))
|
||||||
|
})) else None
|
||||||
|
val smem = if (!tile.vortexParams.useVxCache) Some(Vec(tile.numLanes, new Bundle {
|
||||||
|
// val a = Decoupled(new VortexBundleA(tagWidth = tile.smemTagWidth, dataWidth = 32))
|
||||||
|
// val d = Flipped(Decoupled(new VortexBundleD(tagWidth = tile.smemTagWidth, dataWidth = 32)))
|
||||||
})) else None
|
})) else None
|
||||||
val mem = if (tile.vortexParams.useVxCache) Some(new Bundle {
|
val mem = if (tile.vortexParams.useVxCache) Some(new Bundle {
|
||||||
val a = Decoupled(new VortexBundleA(sourceWidth = 15, dataWidth = 128))
|
val a = Decoupled(new VortexBundleA(tagWidth = 15, dataWidth = 128))
|
||||||
val d = Flipped(Decoupled(new VortexBundleD(sourceWidth = 15, dataWidth = 128)))
|
val d = Flipped(Decoupled(new VortexBundleD(tagWidth = 15, dataWidth = 128)))
|
||||||
// val a = tile.memNode.out.head._1.a.cloneType
|
// val a = tile.memNode.out.head._1.a.cloneType
|
||||||
// val d = Flipped(tile.memNode.out.head._1.d.cloneType)
|
// val d = Flipped(tile.memNode.out.head._1.d.cloneType)
|
||||||
}) else None
|
}) else None
|
||||||
|
|
||||||
|
// Chisel doesn't support 2-D array in BlackBox interface to Verilog, so
|
||||||
|
// everything needs to be 1-D flattened UInt with their widths configurable by numLanes.
|
||||||
|
//
|
||||||
|
// FIXME: hardcoded bitwidths
|
||||||
|
val dmem_a_ready = Input(UInt((tile.numLanes * 1).W))
|
||||||
|
val dmem_a_valid = Output(UInt((tile.numLanes * 1).W))
|
||||||
|
val dmem_a_bits_opcode = Output(UInt((tile.numLanes * 3).W))
|
||||||
|
val dmem_a_bits_size = Output(UInt((tile.numLanes * 4).W))
|
||||||
|
val dmem_a_bits_source = Output(UInt((tile.numLanes * tile.dmemTagWidth).W))
|
||||||
|
val dmem_a_bits_address = Output(UInt((tile.numLanes * 32).W))
|
||||||
|
val dmem_a_bits_mask = Output(UInt((tile.numLanes * 4).W))
|
||||||
|
val dmem_a_bits_data = Output(UInt((tile.numLanes * 32).W))
|
||||||
|
|
||||||
|
val dmem_d_valid = Input(UInt((tile.numLanes * 1).W))
|
||||||
|
val dmem_d_bits_opcode = Input(UInt((tile.numLanes * 3).W))
|
||||||
|
val dmem_d_bits_size = Input(UInt((tile.numLanes * 4).W))
|
||||||
|
val dmem_d_bits_source = Input(UInt((tile.numLanes * tile.dmemTagWidth).W))
|
||||||
|
val dmem_d_bits_data = Input(UInt((tile.numLanes * 32).W))
|
||||||
|
val dmem_d_ready = Output(UInt((tile.numLanes * 1).W))
|
||||||
|
|
||||||
|
val smem_a_ready = Input(UInt((tile.numLanes * 1).W))
|
||||||
|
val smem_a_valid = Output(UInt((tile.numLanes * 1).W))
|
||||||
|
val smem_a_bits_opcode = Output(UInt((tile.numLanes * 3).W))
|
||||||
|
val smem_a_bits_size = Output(UInt((tile.numLanes * 4).W))
|
||||||
|
val smem_a_bits_source = Output(UInt((tile.numLanes * tile.smemTagWidth).W))
|
||||||
|
val smem_a_bits_address = Output(UInt((tile.numLanes * 32).W))
|
||||||
|
val smem_a_bits_mask = Output(UInt((tile.numLanes * 4).W))
|
||||||
|
val smem_a_bits_data = Output(UInt((tile.numLanes * 32).W))
|
||||||
|
|
||||||
|
val smem_d_valid = Input(UInt((tile.numLanes * 1).W))
|
||||||
|
val smem_d_bits_opcode = Input(UInt((tile.numLanes * 3).W))
|
||||||
|
val smem_d_bits_size = Input(UInt((tile.numLanes * 4).W))
|
||||||
|
val smem_d_bits_source = Input(UInt((tile.numLanes * tile.smemTagWidth).W))
|
||||||
|
val smem_d_bits_data = Input(UInt((tile.numLanes * 32).W))
|
||||||
|
val smem_d_ready = Output(UInt((tile.numLanes * 1).W))
|
||||||
|
|
||||||
// val fpu = Flipped(new FPUCoreIO())
|
// val fpu = Flipped(new FPUCoreIO())
|
||||||
//val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs))
|
//val rocc = Flipped(new RoCCCoreIO(nTotalRoCCCSRs))
|
||||||
//val trace = Output(new TraceBundle)
|
//val trace = Output(new TraceBundle)
|
||||||
@@ -70,7 +110,12 @@ class Vortex(tile: VortexTile)(implicit p: Parameters)
|
|||||||
// Each Vortex core gets tied-off hartId of 0, 1, 2, 3, ...
|
// Each Vortex core gets tied-off hartId of 0, 1, 2, 3, ...
|
||||||
// The actual MHARTID read by the program is different by warp, not core;
|
// The actual MHARTID read by the program is different by warp, not core;
|
||||||
// see VX_csr_data that implements the read logic for CSR_MHARTID/GWID.
|
// see VX_csr_data that implements the read logic for CSR_MHARTID/GWID.
|
||||||
Map("CORE_ID" -> tile.tileParams.hartId)
|
Map(
|
||||||
|
"CORE_ID" -> tile.tileParams.hartId,
|
||||||
|
// TODO: can we get this as a parameter?
|
||||||
|
"BOOTROM_HANG100" -> 0x10100,
|
||||||
|
"NUM_THREADS" -> tile.numLanes
|
||||||
|
)
|
||||||
)
|
)
|
||||||
with HasBlackBoxResource {
|
with HasBlackBoxResource {
|
||||||
// addResource("/vsrc/vortex/hw/unit_tests/generic_queue/testbench.v")
|
// addResource("/vsrc/vortex/hw/unit_tests/generic_queue/testbench.v")
|
||||||
@@ -90,87 +135,153 @@ class Vortex(tile: VortexTile)(implicit p: Parameters)
|
|||||||
// addResource("/vsrc/vortex/hw/syn/synopsys/models/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.v")
|
// addResource("/vsrc/vortex/hw/syn/synopsys/models/memory/cln28hpc/rf2_32x128_wm1/rf2_32x128_wm1.v")
|
||||||
// addResource("/vsrc/vortex/hw/syn/synopsys/models/memory/cln28hpc/rf2_32x128_wm1/vsim/rf2_32x128_wm1_tb.v")
|
// addResource("/vsrc/vortex/hw/syn/synopsys/models/memory/cln28hpc/rf2_32x128_wm1/vsim/rf2_32x128_wm1_tb.v")
|
||||||
// addResource("/vsrc/vortex/hw/syn/modelsim/vortex_tb.v")
|
// addResource("/vsrc/vortex/hw/syn/modelsim/vortex_tb.v")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_dispatch.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_issue.sv")
|
|
||||||
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_define.vh")
|
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/VX_gpu_pkg.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_warp_sched.sv")
|
|
||||||
// addResource("/vsrc/vortex/hw/rtl/Vortex.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sat.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_stride.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_lerp.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_addr.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_mem.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_format.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sampler.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_unit.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_define.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_wrap.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_scope.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_fpu_unit.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_scoreboard.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_writeback.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_muldiv.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_decode.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_ibuffer.sv")
|
|
||||||
// addResource("/vsrc/vortex/hw/rtl/VX_cluster.sv")
|
// addResource("/vsrc/vortex/hw/rtl/VX_cluster.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_icache_stage.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_gpu_unit.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_trace_instr.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_gpu_types.vh")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_config.vh")
|
addResource("/vsrc/vortex/hw/rtl/VX_config.vh")
|
||||||
// unused addResource("/vsrc/vortex/hw/rtl/libs/VX_mux.sv")
|
addResource("/vsrc/vortex/hw/VX_config.h")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_lzc.sv")
|
addResource("/vsrc/vortex/hw/rtl/VX_define.vh")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_fifo_queue.sv")
|
addResource("/vsrc/vortex/hw/rtl/VX_platform.vh")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_scan.sv")
|
addResource("/vsrc/vortex/hw/rtl/VX_scope.vh")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_find_first.sv")
|
// addResource("/vsrc/vortex/hw/rtl/VX_socket.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_multiplier.sv")
|
addResource("/vsrc/vortex/hw/rtl/VX_types.vh")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_remove.sv")
|
// addResource("/vsrc/vortex/hw/rtl/Vortex.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_pipe_register.sv")
|
|
||||||
// unused addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_mux.sv")
|
addResource("/vsrc/vortex/hw/rtl/core/VX_alu_unit.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_encoder.sv")
|
addResource("/vsrc/vortex/hw/rtl/core/VX_commit.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_reset_relay.sv")
|
addResource("/vsrc/vortex/hw/rtl/core/VX_core.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_popcount.sv")
|
// These are top modules used for unittests
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/core/VX_core_top.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_top.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_cluster_top.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_csr_data.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_csr_unit.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_dcr_data.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_decode.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_dispatch.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_dispatch_unit.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_execute.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_fetch.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_gather_unit.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_ibuffer.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_int_unit.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_ipdom_stack.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_issue.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_lsu_unit.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_muldiv_unit.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_operands.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_pending_instr.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_schedule.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_scoreboard.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_sfu_unit.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_smem_unit.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_split_join.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_trace.vh")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/core/VX_wctl_unit.sv")
|
||||||
|
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_bank.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_bypass.sv")
|
||||||
|
// need to disable this if VX_cache_cluster_top is disabled, otherwise causes
|
||||||
|
// unconnected port error
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_cluster.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_data.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_define.vh")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_init.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_mshr.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_tags.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/cache/VX_cache_wrap.sv")
|
||||||
|
|
||||||
|
// gbar is only used in the socket/cluster hierarchy
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_arb.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_bus_if.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/mem/VX_gbar_unit.sv")
|
||||||
|
// mem_arb is used in VX_socket or VX_cache_cluster
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_arb.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_bus_if.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/mem/VX_mem_perf_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/mem/VX_shared_mem.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/mem/VX_smem_switch.sv")
|
||||||
|
|
||||||
|
// tex_unit missing in Vortex 2.0
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sat.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_stride.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_lerp.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_addr.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_mem.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_format.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_sampler.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_unit.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_define.vh")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/tex_unit/VX_tex_wrap.sv")
|
||||||
|
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_allocator.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/libs/VX_avs_adapter.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/libs/VX_axi_adapter.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_insert.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_insert.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_skid_buffer.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_bits_remove.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_fixed_arbiter.sv")
|
// unused addResource("/vsrc/vortex/hw/rtl/libs/VX_bypass_buffer.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_shift_register.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_cyclic_arbiter.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_index_buffer.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_encoder.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_matrix_arbiter.sv")
|
|
||||||
// unused addResource("/vsrc/vortex/hw/rtl/libs/VX_divider.sv")
|
// unused addResource("/vsrc/vortex/hw/rtl/libs/VX_divider.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_dp_ram.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_dp_ram.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_axi_adapter.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_elastic_adapter.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_elastic_buffer.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_elastic_buffer.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_rr_arbiter.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_arbiter.sv")
|
|
||||||
// unused addResource("/vsrc/vortex/hw/rtl/libs/VX_bypass_buffer.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_sp_ram.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_demux.sv")
|
|
||||||
|
|
||||||
// unused addResource("/vsrc/vortex/hw/rtl/libs/VX_index_queue.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_serial_div.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/libs/VX_fair_arbiter.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_fair_arbiter.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_define.vh")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_fifo_queue.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_csr_data.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_find_first.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_cache_arb.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_generic_arbiter.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_ipdom_stack.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_index_buffer.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_gpr_stage.sv")
|
// unused addResource("/vsrc/vortex/hw/rtl/libs/VX_index_queue.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_execute.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_lzc.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_fetch.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_matrix_arbiter.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_alu_unit.sv")
|
// addResource("/vsrc/vortex/hw/rtl/libs/VX_mem_adapter.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_platform.vh")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_mem_rsp_sel.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_commit.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_mem_scheduler.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_multiplier.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_pipeline.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_mux.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_lsu_unit.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_encoder.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_csr_unit.sv")
|
// unused addResource("/vsrc/vortex/hw/rtl/libs/VX_onehot_mux.sv")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/Vortex_axi.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_pending_size.sv")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_div.sv")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_pipe_register.sv")
|
||||||
addResource("/vsrc/vortex/hw/VX_config.h")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_popcount.sv")
|
||||||
addResource("/vsrc/vortex/sim/common/rvfloats.h")
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_arbiter.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_priority_encoder.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_reduce.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_reset_relay.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_rr_arbiter.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_scan.sv")
|
||||||
|
// These VX_scope_* seems to be used for FPGA debugging; if we leave them in,
|
||||||
|
// they cause elaboration errors
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/libs/VX_scope_switch.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/libs/VX_scope_tap.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_serial_div.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_serial_mul.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_shift_register.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_skid_buffer.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_sp_ram.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_arb.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_switch.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/libs/VX_stream_xbar.sv")
|
||||||
|
|
||||||
|
addResource("/vsrc/vortex/hw/dpi/float_dpi.cpp")
|
||||||
|
addResource("/vsrc/vortex/hw/dpi/float_dpi.vh")
|
||||||
|
addResource("/vsrc/vortex/hw/dpi/util_dpi.cpp")
|
||||||
|
addResource("/vsrc/vortex/hw/dpi/util_dpi.vh")
|
||||||
|
// needed dpi cpp files
|
||||||
|
addResource("/vsrc/vortex/sim/common/bitmanip.h")
|
||||||
|
addResource("/vsrc/vortex/sim/common/mem.cpp")
|
||||||
|
addResource("/vsrc/vortex/sim/common/mem.h")
|
||||||
|
addResource("/vsrc/vortex/sim/common/mempool.h")
|
||||||
addResource("/vsrc/vortex/sim/common/rvfloats.cpp")
|
addResource("/vsrc/vortex/sim/common/rvfloats.cpp")
|
||||||
|
addResource("/vsrc/vortex/sim/common/rvfloats.h")
|
||||||
|
addResource("/vsrc/vortex/sim/common/simobject.h")
|
||||||
|
addResource("/vsrc/vortex/sim/common/stringutil.h")
|
||||||
|
addResource("/vsrc/vortex/sim/common/util.cpp")
|
||||||
|
addResource("/vsrc/vortex/sim/common/util.h")
|
||||||
|
addResource("/vsrc/vortex/sim/common/uuid_gen.h")
|
||||||
|
|
||||||
// addResource("/csrc/softfloat_archive.a")
|
// addResource("/csrc/softfloat_archive.a")
|
||||||
addResource("/csrc/softfloat/include/internals.h")
|
addResource("/csrc/softfloat/include/internals.h")
|
||||||
addResource("/csrc/softfloat/include/primitives.h")
|
addResource("/csrc/softfloat/include/primitives.h")
|
||||||
@@ -178,11 +289,22 @@ class Vortex(tile: VortexTile)(implicit p: Parameters)
|
|||||||
addResource("/csrc/softfloat/include/softfloat.h")
|
addResource("/csrc/softfloat/include/softfloat.h")
|
||||||
addResource("/csrc/softfloat/include/softfloat_types.h")
|
addResource("/csrc/softfloat/include/softfloat_types.h")
|
||||||
addResource("/csrc/softfloat/RISCV/specialize.h")
|
addResource("/csrc/softfloat/RISCV/specialize.h")
|
||||||
addResource("/vsrc/vortex/hw/dpi/float_dpi.cpp")
|
|
||||||
addResource("/vsrc/vortex/hw/dpi/float_dpi.vh")
|
// Vortex 2.0: fp_cores/ renamed to fpu/
|
||||||
addResource("/vsrc/vortex/hw/dpi/util_dpi.cpp")
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_class.sv")
|
||||||
addResource("/vsrc/vortex/hw/dpi/util_dpi.vh")
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_cvt.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_dpi.sv")
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_define.vh")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_div.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_dpi.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_dsp.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_fma.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_fpnew.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_ncomp.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_pkg.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_rounding.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_sqrt.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_to_csr_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/fpu/VX_fpu_unit.sv")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_rounding.sv")
|
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_rounding.sv")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/fp_cores/altera/stratix10/dspba_delay_ver.sv")
|
// addResource("/vsrc/vortex/hw/rtl/fp_cores/altera/stratix10/dspba_delay_ver.sv")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/fp_cores/altera/stratix10/acl_fsqrt.sv")
|
// addResource("/vsrc/vortex/hw/rtl/fp_cores/altera/stratix10/acl_fsqrt.sv")
|
||||||
@@ -195,46 +317,32 @@ class Vortex(tile: VortexTile)(implicit p: Parameters)
|
|||||||
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_class.sv")
|
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_class.sv")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_fpnew.sv")
|
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_fpnew.sv")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_cvt.sv")
|
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_cvt.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_define.vh")
|
|
||||||
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_fma.sv")
|
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_fma.sv")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_ncomp.sv")
|
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_ncomp.sv")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_fpga.sv")
|
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_fpga.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_types.vh")
|
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fpu_types.vh")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_sqrt.sv")
|
// addResource("/vsrc/vortex/hw/rtl/fp_cores/VX_fp_sqrt.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_rsp_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_csr_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_join_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_cache_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_memsys_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_decode_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_writeback_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpu_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_pipeline_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_gpr_rsp_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_cmt_to_csr_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_to_alu_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ifetch_rsp_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_alu_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_csr_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ibuffer_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_branch_ctl_if.sv")
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_branch_ctl_if.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcache_rsp_if.sv")
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_commit_csr_if.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_icache_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_lsu_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_wstall_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_rsp_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_to_csr_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_commit_if.sv")
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_commit_if.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_req_if.sv")
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_commit_sched_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dcr_bus_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_decode_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_decode_sched_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_dispatch_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_execute_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fetch_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_ibuffer_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_operands_if.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/interfaces/VX_pipeline_perf_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_sched_csr_if.sv")
|
||||||
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_schedule_if.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/interfaces/VX_sfu_csr_if.sv")
|
||||||
|
// addResource("/vsrc/vortex/hw/rtl/interfaces/VX_sfu_perf_if.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_warp_ctl_if.sv")
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_warp_ctl_if.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_tex_rsp_if.sv")
|
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_writeback_if.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fetch_to_csr_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_perf_tex_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_mem_req_if.sv")
|
|
||||||
addResource("/vsrc/vortex/hw/rtl/interfaces/VX_fpu_req_if.sv")
|
|
||||||
// addResource("/vsrc/vortex/hw/rtl/afu/vortex_afu.sv")
|
// addResource("/vsrc/vortex/hw/rtl/afu/vortex_afu.sv")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/afu/ccip_std_afu.sv")
|
// addResource("/vsrc/vortex/hw/rtl/afu/ccip_std_afu.sv")
|
||||||
// addResource("/vsrc/vortex/hw/rtl/afu/vortex_afu.vh")
|
// addResource("/vsrc/vortex/hw/rtl/afu/vortex_afu.vh")
|
||||||
@@ -262,10 +370,9 @@ class Vortex(tile: VortexTile)(implicit p: Parameters)
|
|||||||
addResource("/vsrc/vortex/hw/rtl/VX_core.sv")
|
addResource("/vsrc/vortex/hw/rtl/VX_core.sv")
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_core_wrapper.sv")
|
addResource("/vsrc/vortex/hw/rtl/VX_core_wrapper.sv")
|
||||||
} else {
|
} else {
|
||||||
addResource("/vsrc/vortex/hw/rtl/VX_pipeline_wrapper.sv")
|
addResource("/vsrc/vortex/hw/rtl/VX_core_wrapper.sv")
|
||||||
}
|
}
|
||||||
|
|
||||||
val nTotalRoCCCSRs = 0
|
val nTotalRoCCCSRs = 0
|
||||||
val coreBundle = new VortexBundle(tile)
|
val io = IO(new VortexBundle(tile))
|
||||||
val io = IO(coreBundle)
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -187,6 +187,18 @@ class VortexTile private (
|
|||||||
"We recommend setting nSrcIds to at least 16."
|
"We recommend setting nSrcIds to at least 16."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
val smemSourceWidth = 4 // FIXME: hardcoded
|
||||||
|
|
||||||
|
// TODO: parametrize
|
||||||
|
val numWarps = 4
|
||||||
|
val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps))
|
||||||
|
val UUID_WIDTH = 44
|
||||||
|
val imemTagWidth = UUID_WIDTH + NW_WIDTH
|
||||||
|
val LSUQ_TAG_BITS = 4
|
||||||
|
val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS
|
||||||
|
// dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH
|
||||||
|
val smemTagWidth = dmemTagWidth
|
||||||
|
|
||||||
val imemNodes = Seq.tabulate(1) { i =>
|
val imemNodes = Seq.tabulate(1) { i =>
|
||||||
TLClientNode(
|
TLClientNode(
|
||||||
Seq(
|
Seq(
|
||||||
@@ -228,14 +240,37 @@ class VortexTile private (
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val smemNodes = Seq.tabulate(numLanes) { i =>
|
||||||
|
TLClientNode(
|
||||||
|
Seq(
|
||||||
|
TLMasterPortParameters.v1(
|
||||||
|
clients = Seq(
|
||||||
|
TLMasterParameters.v1(
|
||||||
|
sourceId = IdRange(0, 1 << smemSourceWidth),
|
||||||
|
name = s"Vortex Core ${vortexParams.hartId} SharedMem Lane $i",
|
||||||
|
requestFifo = true,
|
||||||
|
supportsProbe =
|
||||||
|
TransferSizes(1, lazyCoreParamsView.coreDataBytes),
|
||||||
|
supportsGet = TransferSizes(1, lazyCoreParamsView.coreDataBytes),
|
||||||
|
supportsPutFull =
|
||||||
|
TransferSizes(1, lazyCoreParamsView.coreDataBytes),
|
||||||
|
supportsPutPartial =
|
||||||
|
TransferSizes(1, lazyCoreParamsView.coreDataBytes)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// combine outgoing per-lane dmemNode into 1 idenity node
|
// combine outgoing per-lane dmemNode into 1 idenity node
|
||||||
//
|
//
|
||||||
// NOTE: We need TLWidthWidget here because there might be a data width
|
// NOTE: We need TLWidthWidget here because there might be a data width
|
||||||
// mismatch between Vortex's per-lane response and the system bus when we
|
// mismatch between Vortex's per-lane response and the system bus when we
|
||||||
// don't instantiate either L1 or the coalescer. This _should_ be optimized
|
// don't instantiate either L1 or the coalescer. This _should_ be optimized
|
||||||
// out when we instantiate coalescer which should handle data width conversion
|
// out when we instantiate either which should handle data width conversion
|
||||||
// internally (which it does by... using TLWidthWidget), but probably not
|
// internally (which it does by... using TLWidthWidget).
|
||||||
// the cleanest way to do this.
|
|
||||||
val dmemAggregateNode = TLIdentityNode()
|
val dmemAggregateNode = TLIdentityNode()
|
||||||
dmemNodes.foreach { dmemAggregateNode := TLWidthWidget(4) := _ }
|
dmemNodes.foreach { dmemAggregateNode := TLWidthWidget(4) := _ }
|
||||||
|
|
||||||
@@ -262,7 +297,7 @@ class VortexTile private (
|
|||||||
val coalescerNode = p(CoalescerKey) match {
|
val coalescerNode = p(CoalescerKey) match {
|
||||||
case Some(coalescerParam) => {
|
case Some(coalescerParam) => {
|
||||||
val coal = LazyModule(
|
val coal = LazyModule(
|
||||||
new CoalescingUnit(coalescerParam.copy(enable = true))
|
new CoalescingUnit(coalescerParam)
|
||||||
)
|
)
|
||||||
coal.cpuNode :=* dmemAggregateNode
|
coal.cpuNode :=* dmemAggregateNode
|
||||||
coal.aggregateNode // N+1 lanes
|
coal.aggregateNode // N+1 lanes
|
||||||
@@ -271,37 +306,56 @@ class VortexTile private (
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Conditionally instantiate L1 cache
|
// Conditionally instantiate L1 cache
|
||||||
val l1Node = p(VortexL1Key) match {
|
val (icacheNode, dcacheNode): (TLNode, TLNode) = p(VortexL1Key) match {
|
||||||
case Some(vortexL1Config) => {
|
case Some(vortexL1Config) => {
|
||||||
println(
|
println(
|
||||||
s"============ Using Vortex L1 cache ================="
|
s"============ Using Vortex L1 cache ================="
|
||||||
)
|
)
|
||||||
require(
|
// require(
|
||||||
p(CoalescerKey).isDefined,
|
// p(CoalescerKey).isDefined,
|
||||||
"Vortex L1 configuration currently only works when coalescer is also enabled."
|
// "Vortex L1 configuration currently only works when coalescer is also enabled."
|
||||||
)
|
// )
|
||||||
|
|
||||||
val l1cache = LazyModule(new VortexL1Cache(vortexL1Config))
|
val icache = LazyModule(new VortexL1Cache(vortexL1Config))
|
||||||
// Connect L1 with imem_fetch_interface without XBar
|
val dcache = LazyModule(new VortexL1Cache(vortexL1Config))
|
||||||
// coalToVxCacheNode is a bad naming, it really means up steam of vxBank in whihc it takes input
|
// imemNodes.foreach { icache.coresideNode := TLWidthWidget(4) := _ }
|
||||||
// imemNodes.foreach { l1cache.icache_bank.coalToVxCacheNode := TLWidthWidget(4) := _ }
|
assert(imemNodes.length == 1) // FIXME
|
||||||
imemNodes.foreach { l1cache.coresideNode := TLWidthWidget(4) := _ }
|
icache.coresideNode := TLWidthWidget(4) := imemNodes(0)
|
||||||
l1cache.coresideNode :=* coalescerNode
|
// dmemNodes go through coalescerNode
|
||||||
l1cache.masterNode
|
dcache.coresideNode :=* coalescerNode
|
||||||
|
(icache.masterNode, dcache.masterNode)
|
||||||
}
|
}
|
||||||
case None => {
|
case None => {
|
||||||
// Regardless of using coalescer or not, if we're not using L1, imemNode
|
val imemWideNode = TLIdentityNode()
|
||||||
// goes directly to tile exit xbar
|
assert(imemNodes.length == 1) // FIXME
|
||||||
// FIXME: unnatural, have L1 just handle dmem
|
imemWideNode := TLWidthWidget(4) := imemNodes(0)
|
||||||
imemNodes.foreach { tlMasterXbar.node := TLWidthWidget(4) := _ }
|
(imemWideNode, coalescerNode)
|
||||||
coalescerNode
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Instantiate sharedmem banks
|
||||||
|
//
|
||||||
|
// Instantiate the same number of banks as there are lanes.
|
||||||
|
// TODO: parametrize
|
||||||
|
val smemBanks = Seq.tabulate(numLanes) { bankId =>
|
||||||
|
// Banked-by-word (4 bytes)
|
||||||
|
// base for bank 1: ff...000000|01|00
|
||||||
|
// mask for bank 1; 00...111111|00|11
|
||||||
|
val base = 0xff000000L | (bankId * 4 /*wordSize*/ )
|
||||||
|
val mask = 0x00ffffffL ^ ((numLanes - 1) * 4 /*wordSize*/ )
|
||||||
|
LazyModule(new TLRAM(AddressSet(base, mask), beatBytes = 4 /*wordSize*/ ))
|
||||||
|
}
|
||||||
|
// smem lanes-to-banks crossbar
|
||||||
|
val smemXbar = LazyModule(new TLXbar)
|
||||||
|
smemNodes.foreach(smemXbar.node := _)
|
||||||
|
smemBanks.foreach(_.node := smemXbar.node)
|
||||||
|
|
||||||
if (vortexParams.useVxCache) {
|
if (vortexParams.useVxCache) {
|
||||||
tlMasterXbar.node := TLWidthWidget(16) := memNode
|
tlMasterXbar.node := TLWidthWidget(16) := memNode
|
||||||
} else {
|
} else {
|
||||||
tlMasterXbar.node :=* l1Node
|
// imemNodes.foreach { tlMasterXbar.node := TLWidthWidget(4) := _ }
|
||||||
|
tlMasterXbar.node :=* icacheNode
|
||||||
|
tlMasterXbar.node :=* dcacheNode
|
||||||
}
|
}
|
||||||
|
|
||||||
/* below are copied from rocket */
|
/* below are copied from rocket */
|
||||||
@@ -457,95 +511,170 @@ class VortexTileModuleImp(outer: VortexTile) extends BaseTileModuleImp(outer) {
|
|||||||
outer.memNode.out(0)._1.a <> memTLAdapter.io.outReq
|
outer.memNode.out(0)._1.a <> memTLAdapter.io.outReq
|
||||||
memTLAdapter.io.outResp <> outer.memNode.out(0)._1.d
|
memTLAdapter.io.outResp <> outer.memNode.out(0)._1.d
|
||||||
} else {
|
} else {
|
||||||
val imemTLAdapter = Module(
|
def connectImem = {
|
||||||
new VortexTLAdapter(
|
val imemTLAdapter = Module(
|
||||||
outer.imemSourceWidth,
|
|
||||||
chiselTypeOf(core.io.imem.get(0).a.bits),
|
|
||||||
chiselTypeOf(core.io.imem.get(0).d.bits),
|
|
||||||
outer.imemNodes.head.out.head
|
|
||||||
)
|
|
||||||
)
|
|
||||||
// TODO: make imemNodes not a vector
|
|
||||||
imemTLAdapter.io.inReq <> core.io.imem.get(0).a
|
|
||||||
core.io.imem.get(0).d <> imemTLAdapter.io.inResp
|
|
||||||
outer.imemNodes(0).out(0)._1.a <> imemTLAdapter.io.outReq
|
|
||||||
imemTLAdapter.io.outResp <> outer.imemNodes(0).out(0)._1.d
|
|
||||||
|
|
||||||
// @perf: this would duplicate SourceGenerator table for every lane and eat
|
|
||||||
// up some area
|
|
||||||
val dmemTLBundles = outer.dmemNodes.map(_.out.head._1)
|
|
||||||
val dmemTLAdapters = Seq.tabulate(outer.numLanes) { _ =>
|
|
||||||
Module(
|
|
||||||
new VortexTLAdapter(
|
new VortexTLAdapter(
|
||||||
outer.dmemSourceWidth,
|
outer.imemSourceWidth,
|
||||||
chiselTypeOf(core.io.dmem.get(0).a.bits),
|
chiselTypeOf(core.io.imem.get(0).a.bits),
|
||||||
chiselTypeOf(core.io.dmem.get(0).d.bits),
|
chiselTypeOf(core.io.imem.get(0).d.bits),
|
||||||
outer.dmemNodes(0).out.head
|
outer.imemNodes.head.out.head
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
// TODO: make imemNodes not a vector
|
||||||
|
imemTLAdapter.io.inReq <> core.io.imem.get(0).a
|
||||||
|
core.io.imem.get(0).d <> imemTLAdapter.io.inResp
|
||||||
|
outer.imemNodes(0).out(0)._1.a <> imemTLAdapter.io.outReq
|
||||||
|
imemTLAdapter.io.outResp <> outer.imemNodes(0).out(0)._1.d
|
||||||
}
|
}
|
||||||
|
|
||||||
// Since the individual per-lane TL requests might come back out-of-sync between
|
def connectDmem = {
|
||||||
// the lanes, but Vortex core expects the per-lane responses to be synced,
|
// @perf: this would duplicate SourceGenerator table for every lane and eat
|
||||||
// we need to selectively fire responses that have the same source, and
|
// up some area
|
||||||
// delay others.
|
val dmemTLBundles = outer.dmemNodes.map(_.out.head._1)
|
||||||
//
|
val dmemTLAdapters = Seq.tabulate(outer.numLanes) { _ =>
|
||||||
// In order to do that, we pick a source from one of the valid lanes using e.g.
|
Module(
|
||||||
// an arbiter. Then using the chosen source id, we
|
new VortexTLAdapter(
|
||||||
// - lie to core that response is not valid if source doesn't match picked, and
|
outer.dmemSourceWidth,
|
||||||
// - lie to downstream that core is not ready if source doesn't match picked.
|
new VortexBundleA(tagWidth = outer.dmemTagWidth, dataWidth = 32),
|
||||||
//
|
new VortexBundleD(tagWidth = outer.dmemTagWidth, dataWidth = 32),
|
||||||
// Note that we cannot do this filtering logic using TileLink source ID, because
|
outer.dmemNodes(0).out.head
|
||||||
// we're allocating source for each lane independently. In that case, it's
|
)
|
||||||
// possible that lane 0's source matches lane 1/2/3's source by chance,
|
)
|
||||||
// even when they originated from different warps. Using Vortex's dcache req tag
|
}
|
||||||
// solves this issue because they use a UUID that is unique across all requests
|
|
||||||
// in the program.
|
// Since the individual per-lane TL requests might come back out-of-sync between
|
||||||
//
|
// the lanes, but Vortex core expects the per-lane responses to be synced,
|
||||||
// TODO: A cleaner solution would be to simply do a synchronized allocation
|
// we need to selectively fire responses that have the same source, and
|
||||||
// of a same source id for all lanes.
|
// delay others.
|
||||||
val arb = Module(
|
//
|
||||||
new RRArbiter(
|
// In order to do that, we pick a source from one of the valid lanes using e.g.
|
||||||
core.io.dmem.get.head.d.bits.source.cloneType,
|
// an arbiter. Then using the chosen source id, we
|
||||||
outer.numLanes
|
// - lie to core that response is not valid if source doesn't match picked, and
|
||||||
|
// - lie to downstream that core is not ready if source doesn't match picked.
|
||||||
|
//
|
||||||
|
// Note that we cannot do this filtering logic using TileLink source ID, because
|
||||||
|
// we're allocating source for each lane independently. In that case, it's
|
||||||
|
// possible that lane 0's source matches lane 1/2/3's source by chance,
|
||||||
|
// even when they originated from different warps. Using Vortex's dcache req tag
|
||||||
|
// solves this issue because they use a UUID that is unique across all requests
|
||||||
|
// in the program.
|
||||||
|
//
|
||||||
|
// TODO: A cleaner solution would be to simply do a synchronized allocation
|
||||||
|
// of a same source id for all lanes.
|
||||||
|
val arb = Module(
|
||||||
|
new RRArbiter(
|
||||||
|
// FIXME: should really be source on D channel
|
||||||
|
new VortexBundleA(tagWidth = outer.dmemTagWidth, dataWidth = 32).source.cloneType,
|
||||||
|
outer.numLanes
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
arb.io.out.ready := true.B
|
||||||
arb.io.out.ready := true.B
|
val dmemBundles = dmemTLAdapters.map(_.io.inResp)
|
||||||
val dmemBundles = dmemTLAdapters.map(_.io.inResp)
|
(arb.io.in zip dmemBundles).foreach { case (arbIn, vxDmem) =>
|
||||||
(arb.io.in zip dmemBundles).foreach { case (arbIn, vxDmem) =>
|
arbIn.valid := vxDmem.valid
|
||||||
arbIn.valid := vxDmem.valid
|
arbIn.bits := vxDmem.bits.source
|
||||||
arbIn.bits := vxDmem.bits.source
|
}
|
||||||
}
|
val matchingSources = Wire(UInt(outer.numLanes.W))
|
||||||
val matchingSources = Wire(UInt(outer.numLanes.W))
|
matchingSources := dmemBundles
|
||||||
matchingSources := dmemBundles
|
.map(b =>
|
||||||
.map(b =>
|
// If there is no valid response pending across all lanes,
|
||||||
// If there is no valid response pending across all lanes,
|
// matchingSources should not filter out upstream ready signals, so
|
||||||
// matchingSources should not filter out upstream ready signals, so
|
// set it to all-1
|
||||||
// set it to all-1
|
!arb.io.out.valid || (b.bits.source === arb.io.out.bits)
|
||||||
!arb.io.out.valid || (b.bits.source === arb.io.out.bits)
|
)
|
||||||
)
|
.asUInt
|
||||||
.asUInt
|
|
||||||
|
|
||||||
// make connection:
|
// make connection:
|
||||||
// VortexBundle <--> sourceId filter <--> VortexTLAdapter <--> dmemNodes
|
// VortexBundle <--> sourceId filter <--> VortexTLAdapter <--> dmemNodes
|
||||||
(core.io.dmem.get zip dmemTLAdapters) foreach { case (coreMem, tlAdapter) =>
|
//
|
||||||
tlAdapter.io.inReq <> coreMem.a
|
// Chisel doesn't support 2-D array in BlackBox interface to Verilog, so
|
||||||
coreMem.d <> tlAdapter.io.inResp
|
// need to flatten everything.
|
||||||
}
|
dmemTLAdapters.zipWithIndex.foreach {
|
||||||
(core.io.dmem.get zip dmemTLAdapters).zipWithIndex.foreach {
|
case (tlAdapter, i) =>
|
||||||
case ((coreMem, tlAdapter), i) =>
|
// tlAdapter.io.inReq <> coreMem.a
|
||||||
coreMem.d.valid := tlAdapter.io.inResp.valid && matchingSources(i)
|
tlAdapter.io.inReq.valid := core.io.dmem_a_valid(i)
|
||||||
tlAdapter.io.inResp.ready := coreMem.d.ready && matchingSources(i)
|
tlAdapter.io.inReq.bits.opcode := core.io.dmem_a_bits_opcode(3 * (i + 1) - 1, 3 * i)
|
||||||
}
|
tlAdapter.io.inReq.bits.size := core.io.dmem_a_bits_size(4 * (i + 1) - 1, 4 * i)
|
||||||
(dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlOut) =>
|
tlAdapter.io.inReq.bits.source := core.io.dmem_a_bits_source(outer.dmemTagWidth * (i + 1) - 1, outer.dmemTagWidth * i)
|
||||||
tlOut.a <> tlAdapter.io.outReq
|
tlAdapter.io.inReq.bits.address := core.io.dmem_a_bits_address(32 * (i + 1) - 1, 32 * i)
|
||||||
tlAdapter.io.outResp <> tlOut.d
|
tlAdapter.io.inReq.bits.mask := core.io.dmem_a_bits_mask(4 * (i + 1) - 1, 4 * i)
|
||||||
|
tlAdapter.io.inReq.bits.data := core.io.dmem_a_bits_data(32 * (i + 1) - 1, 32 * i)
|
||||||
|
}
|
||||||
|
core.io.dmem_a_ready := dmemTLAdapters.map(_.io.inReq.ready).asUInt
|
||||||
|
|
||||||
|
core.io.dmem_d_valid := dmemTLAdapters.map(_.io.inResp.valid).asUInt
|
||||||
|
core.io.dmem_d_bits_opcode := dmemTLAdapters.map(_.io.inResp.bits.opcode).asUInt
|
||||||
|
core.io.dmem_d_bits_size := dmemTLAdapters.map(_.io.inResp.bits.size).asUInt
|
||||||
|
core.io.dmem_d_bits_source := dmemTLAdapters.map(_.io.inResp.bits.source).asUInt
|
||||||
|
core.io.dmem_d_bits_data := dmemTLAdapters.map(_.io.inResp.bits.data).asUInt
|
||||||
|
|
||||||
|
// override response channel with matchingSources
|
||||||
|
val dmem_d_valid_vec = Wire(Vec(outer.numLanes, Bool()))
|
||||||
|
dmemTLAdapters.zipWithIndex.foreach {
|
||||||
|
case (tlAdapter, i) =>
|
||||||
|
dmem_d_valid_vec(i) := tlAdapter.io.inResp.valid && matchingSources(i)
|
||||||
|
tlAdapter.io.inResp.ready := core.io.dmem_d_ready(i) && matchingSources(i)
|
||||||
|
}
|
||||||
|
core.io.dmem_d_valid := dmem_d_valid_vec.asUInt
|
||||||
|
|
||||||
|
(dmemTLAdapters zip dmemTLBundles) foreach { case (tlAdapter, tlOut) =>
|
||||||
|
tlOut.a <> tlAdapter.io.outReq
|
||||||
|
tlAdapter.io.outResp <> tlOut.d
|
||||||
|
}
|
||||||
|
|
||||||
|
outer.dmemAggregateNode.out.foreach { bo =>
|
||||||
|
dontTouch(bo._1.a)
|
||||||
|
dontTouch(bo._1.d)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
outer.dmemAggregateNode.out.foreach { bo =>
|
def connectSmem = {
|
||||||
dontTouch(bo._1.a)
|
// @perf: this would duplicate SourceGenerator table for every lane and eat
|
||||||
dontTouch(bo._1.d)
|
// up some area
|
||||||
|
val smemTLBundles = outer.smemNodes.map(_.out.head._1)
|
||||||
|
val smemTLAdapters = Seq.tabulate(outer.numLanes) { _ =>
|
||||||
|
Module(
|
||||||
|
new VortexTLAdapter(
|
||||||
|
outer.smemSourceWidth,
|
||||||
|
new VortexBundleA(tagWidth = outer.smemTagWidth, dataWidth = 32),
|
||||||
|
new VortexBundleD(tagWidth = outer.smemTagWidth, dataWidth = 32),
|
||||||
|
outer.smemNodes(0).out.head
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
smemTLAdapters.zipWithIndex.foreach {
|
||||||
|
case (tlAdapter, i) =>
|
||||||
|
// tlAdapter.io.inReq <> coreMem.a
|
||||||
|
tlAdapter.io.inReq.valid := core.io.smem_a_valid(i)
|
||||||
|
tlAdapter.io.inReq.bits.opcode := core.io.smem_a_bits_opcode(3 * (i + 1) - 1, 3 * i)
|
||||||
|
tlAdapter.io.inReq.bits.size := core.io.smem_a_bits_size(4 * (i + 1) - 1, 4 * i)
|
||||||
|
tlAdapter.io.inReq.bits.source := core.io.smem_a_bits_source(outer.smemTagWidth * (i + 1) - 1, outer.smemTagWidth * i)
|
||||||
|
tlAdapter.io.inReq.bits.address := core.io.smem_a_bits_address(32 * (i + 1) - 1, 32 * i)
|
||||||
|
tlAdapter.io.inReq.bits.mask := core.io.smem_a_bits_mask(4 * (i + 1) - 1, 4 * i)
|
||||||
|
tlAdapter.io.inReq.bits.data := core.io.smem_a_bits_data(32 * (i + 1) - 1, 32 * i)
|
||||||
|
}
|
||||||
|
core.io.smem_a_ready := smemTLAdapters.map(_.io.inReq.ready).asUInt
|
||||||
|
|
||||||
|
core.io.smem_d_valid := smemTLAdapters.map(_.io.inResp.valid).asUInt
|
||||||
|
core.io.smem_d_bits_opcode := smemTLAdapters.map(_.io.inResp.bits.opcode).asUInt
|
||||||
|
core.io.smem_d_bits_size := smemTLAdapters.map(_.io.inResp.bits.size).asUInt
|
||||||
|
core.io.smem_d_bits_source := smemTLAdapters.map(_.io.inResp.bits.source).asUInt
|
||||||
|
core.io.smem_d_bits_data := smemTLAdapters.map(_.io.inResp.bits.data).asUInt
|
||||||
|
smemTLAdapters.zipWithIndex.foreach {
|
||||||
|
case (tlAdapter, i) =>
|
||||||
|
tlAdapter.io.inResp.ready := core.io.smem_d_ready(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
(smemTLAdapters zip smemTLBundles) foreach { case (tlAdapter, tlOut) =>
|
||||||
|
tlOut.a <> tlAdapter.io.outReq
|
||||||
|
tlAdapter.io.outResp <> tlOut.d
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
connectImem
|
||||||
|
connectDmem
|
||||||
|
connectSmem
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: generalize for useVxCache
|
// TODO: generalize for useVxCache
|
||||||
@@ -566,7 +695,7 @@ class VortexTLAdapter(
|
|||||||
val inResp = Decoupled(inRespT)
|
val inResp = Decoupled(inRespT)
|
||||||
val outResp = chiselTypeOf(outTL._1.d)
|
val outResp = chiselTypeOf(outTL._1.d)
|
||||||
})
|
})
|
||||||
val edge = outTL._2
|
val (bundle, edge) = outTL
|
||||||
val sourceGen = Module(
|
val sourceGen = Module(
|
||||||
new SourceGenerator(
|
new SourceGenerator(
|
||||||
newSourceWidth,
|
newSourceWidth,
|
||||||
@@ -587,14 +716,20 @@ class VortexTLAdapter(
|
|||||||
io.outReq.bits.size := io.inReq.bits.size
|
io.outReq.bits.size := io.inReq.bits.size
|
||||||
io.outReq.bits.source := io.inReq.bits.source
|
io.outReq.bits.source := io.inReq.bits.source
|
||||||
io.outReq.bits.address := io.inReq.bits.address
|
io.outReq.bits.address := io.inReq.bits.address
|
||||||
// generate TL-correct mask
|
// Get requires contiguous mask; only copy core's potentially-partial mask
|
||||||
io.outReq.bits.mask := edge.mask(io.inReq.bits.address, io.inReq.bits.size)
|
// when writing
|
||||||
|
io.outReq.bits.mask := Mux(
|
||||||
|
edge.hasData(io.outReq.bits),
|
||||||
|
io.inReq.bits.mask,
|
||||||
|
// generate TL-correct mask
|
||||||
|
edge.mask(io.inReq.bits.address, io.inReq.bits.size)
|
||||||
|
)
|
||||||
io.outReq.bits.data := io.inReq.bits.data
|
io.outReq.bits.data := io.inReq.bits.data
|
||||||
io.outReq.bits.corrupt := 0.U
|
io.outReq.bits.corrupt := 0.U
|
||||||
io.inReq.ready := io.outReq.ready
|
io.inReq.ready := io.outReq.ready
|
||||||
// VortexBundleD <> TLBundleD
|
// VortexBundleD <> TLBundleD
|
||||||
// Do not reply to write requests; Vortex core does not expect ack on writes
|
// Filtering out write requests is handled inside the wrapper Verilog
|
||||||
io.inResp.valid := io.outResp.valid && edge.hasData(io.outResp.bits)
|
io.inResp.valid := io.outResp.valid
|
||||||
io.inResp.bits.opcode := io.outResp.bits.opcode
|
io.inResp.bits.opcode := io.outResp.bits.opcode
|
||||||
io.inResp.bits.size := io.outResp.bits.size
|
io.inResp.bits.size := io.outResp.bits.size
|
||||||
io.inResp.bits.source := io.outResp.bits.source
|
io.inResp.bits.source := io.outResp.bits.source
|
||||||
|
|||||||
Reference in New Issue
Block a user