adding sockets support to simx and cache subsystem refactoring

minor update

minor update

minor updates
This commit is contained in:
Blaise Tine
2023-12-20 11:57:44 -08:00
parent 914b680aed
commit c7a81d1493
24 changed files with 541 additions and 388 deletions

View File

@@ -24,14 +24,38 @@ Cluster::Cluster(const SimContext& ctx,
, mem_req_port(this)
, mem_rsp_port(this)
, cluster_id_(cluster_id)
, cores_(arch.num_cores())
, sockets_(NUM_SOCKETS)
, barriers_(arch.num_barriers(), 0)
, sharedmems_(arch.num_cores())
, processor_(processor)
, cores_per_socket_(arch.socket_size())
{
auto num_cores = arch.num_cores();
char sname[100];
auto sockets_per_cluster = sockets_.size();
// create sockets
snprintf(sname, 100, "cluster%d-icache-arb", cluster_id);
auto icache_switch = MemSwitch::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
snprintf(sname, 100, "cluster%d-dcache-arb", cluster_id);
auto dcache_switch = MemSwitch::Create(sname, ArbiterType::RoundRobin, sockets_per_cluster);
for (uint32_t i = 0; i < sockets_per_cluster; ++i) {
uint32_t socket_id = cluster_id * sockets_per_cluster + i;
auto socket = Socket::Create(socket_id, this, arch, dcrs);
socket->icache_mem_req_port.bind(&icache_switch->ReqIn.at(i));
icache_switch->RspIn.at(i).bind(&socket->icache_mem_rsp_port);
socket->dcache_mem_req_port.bind(&dcache_switch->ReqIn.at(i));
dcache_switch->RspIn.at(i).bind(&socket->dcache_mem_rsp_port);
sockets_.at(i) = socket;
}
// Create l2cache
snprintf(sname, 100, "cluster%d-l2cache", cluster_id);
l2cache_ = CacheSim::Create(sname, CacheSim::Config{
!L2_ENABLED,
@@ -42,7 +66,7 @@ Cluster::Cluster(const SimContext& ctx,
log2ceil(L2_NUM_BANKS), // B
XLEN, // address bits
1, // number of ports
5, // request size
2, // request size
true, // write-through
false, // write response
L2_MSHR_SIZE, // mshr
@@ -52,87 +76,11 @@ Cluster::Cluster(const SimContext& ctx,
l2cache_->MemReqPort.bind(&this->mem_req_port);
this->mem_rsp_port.bind(&l2cache_->MemRspPort);
snprintf(sname, 100, "cluster%d-icaches", cluster_id);
icaches_ = CacheCluster::Create(sname, num_cores, NUM_ICACHES, 1, CacheSim::Config{
!ICACHE_ENABLED,
log2ceil(ICACHE_SIZE), // C
log2ceil(L1_LINE_SIZE), // L
log2ceil(sizeof(uint32_t)), // W
log2ceil(ICACHE_NUM_WAYS),// A
1, // B
XLEN, // address bits
1, // number of ports
1, // number of inputs
true, // write-through
false, // write response
(uint8_t)arch.num_warps(), // mshr
2, // pipeline latency
});
icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0));
icaches_->MemReqPort.bind(&l2cache_->CoreReqPorts.at(0));
l2cache_->CoreRspPorts.at(0).bind(&icaches_->MemRspPort);
snprintf(sname, 100, "cluster%d-dcaches", cluster_id);
dcaches_ = CacheCluster::Create(sname, num_cores, NUM_DCACHES, NUM_LSU_LANES, CacheSim::Config{
!DCACHE_ENABLED,
log2ceil(DCACHE_SIZE), // C
log2ceil(L1_LINE_SIZE), // L
log2ceil(sizeof(Word)), // W
log2ceil(DCACHE_NUM_WAYS),// A
log2ceil(DCACHE_NUM_BANKS), // B
XLEN, // address bits
1, // number of ports
DCACHE_NUM_BANKS, // number of inputs
true, // write-through
false, // write response
DCACHE_MSHR_SIZE, // mshr
4, // pipeline latency
});
dcaches_->MemReqPort.bind(&l2cache_->CoreReqPorts.at(1));
l2cache_->CoreRspPorts.at(1).bind(&dcaches_->MemRspPort);
///////////////////////////////////////////////////////////////////////////
// create shared memory blocks
for (uint32_t i = 0; i < num_cores; ++i) {
snprintf(sname, 100, "cluster%d-shared_mem%d", cluster_id, i);
sharedmems_.at(i) = SharedMem::Create(sname, SharedMem::Config{
(1 << SMEM_LOG_SIZE),
sizeof(Word),
NUM_LSU_LANES,
NUM_LSU_LANES,
false
});
}
// create cores
for (uint32_t i = 0; i < num_cores; ++i) {
uint32_t core_id = cluster_id * num_cores + i;
cores_.at(i) = Core::Create(core_id,
this,
arch,
dcrs,
sharedmems_.at(i));
cores_.at(i)->icache_req_ports.at(0).bind(&icaches_->CoreReqPorts.at(i).at(0));
icaches_->CoreRspPorts.at(i).at(0).bind(&cores_.at(i)->icache_rsp_ports.at(0));
for (uint32_t j = 0; j < NUM_LSU_LANES; ++j) {
snprintf(sname, 100, "cluster%d-smem_demux%d_%d", cluster_id, i, j);
auto smem_demux = SMemDemux::Create(sname);
cores_.at(i)->dcache_req_ports.at(j).bind(&smem_demux->ReqIn);
smem_demux->RspIn.bind(&cores_.at(i)->dcache_rsp_ports.at(j));
smem_demux->ReqDC.bind(&dcaches_->CoreReqPorts.at(i).at(j));
dcaches_->CoreRspPorts.at(i).at(j).bind(&smem_demux->RspDC);
smem_demux->ReqSM.bind(&sharedmems_.at(i)->Inputs.at(j));
sharedmems_.at(i)->Outputs.at(j).bind(&smem_demux->RspSM);
}
}
dcache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(1));
l2cache_->CoreRspPorts.at(1).bind(&dcache_switch->RspOut.at(0));
}
Cluster::~Cluster() {
@@ -150,14 +98,14 @@ void Cluster::tick() {
}
void Cluster::attach_ram(RAM* ram) {
for (auto core : cores_) {
core->attach_ram(ram);
for (auto& socket : sockets_) {
socket->attach_ram(ram);
}
}
bool Cluster::running() const {
for (auto& core : cores_) {
if (core->running())
for (auto& socket : sockets_) {
if (socket->running())
return true;
}
return false;
@@ -166,9 +114,9 @@ bool Cluster::running() const {
bool Cluster::check_exit(Word* exitcode, bool riscv_test) const {
bool done = true;
Word exitcode_ = 0;
for (auto& core : cores_) {
for (auto& socket : sockets_) {
Word ec;
if (core->check_exit(&ec, riscv_test)) {
if (socket->check_exit(&ec, riscv_test)) {
exitcode_ |= ec;
} else {
done = false;
@@ -181,36 +129,32 @@ bool Cluster::check_exit(Word* exitcode, bool riscv_test) const {
void Cluster::barrier(uint32_t bar_id, uint32_t count, uint32_t core_id) {
auto& barrier = barriers_.at(bar_id);
uint32_t local_core_id = core_id % cores_.size();
auto sockets_per_cluster = sockets_.size();
auto cores_per_socket = cores_per_socket_;
uint32_t cores_per_cluster = sockets_per_cluster * cores_per_socket;
uint32_t local_core_id = core_id % cores_per_cluster;
barrier.set(local_core_id);
DP(3, "*** Suspend core #" << core_id << " at barrier #" << bar_id);
if (barrier.count() == (size_t)count) {
// resume all suspended cores
for (uint32_t i = 0; i < cores_.size(); ++i) {
if (barrier.test(i)) {
DP(3, "*** Resume core #" << i << " at barrier #" << bar_id);
cores_.at(i)->resume();
for (uint32_t s = 0; s < sockets_per_cluster; ++s) {
for (uint32_t c = 0; c < cores_per_socket; ++c) {
uint32_t i = s * cores_per_socket + c;
if (barrier.test(i)) {
DP(3, "*** Resume core #" << i << " at barrier #" << bar_id);
sockets_.at(s)->resume(c);
}
}
}
barrier.reset();
}
}
ProcessorImpl* Cluster::processor() const {
return processor_;
}
Cluster::PerfStats Cluster::perf_stats() const {
Cluster::PerfStats perf;
perf.icache = icaches_->perf_stats();
perf.dcache = dcaches_->perf_stats();
perf.l2cache = l2cache_->perf_stats();
for (auto sharedmem : sharedmems_) {
perf.sharedmem += sharedmem->perf_stats();
}
return perf;
}