fixes: texture unit mem access sometimes going to smem, bilinear texture filtering; new: cache req_id,

This commit is contained in:
Blaise Tine
2021-11-24 00:00:17 -05:00
parent 1501360f4b
commit 18762dffce
70 changed files with 3818 additions and 1727 deletions

View File

@@ -13,6 +13,7 @@ struct params_t {
uint32_t sets_per_bank;
uint32_t blocks_per_set;
uint32_t words_per_block;
uint32_t log2_num_inputs;
uint32_t word_select_addr_start;
uint32_t word_select_addr_end;
@@ -31,8 +32,10 @@ struct params_t {
uint32_t offset_bits = config.B - config.W;
uint32_t log2_bank_size = config.C - bank_bits;
uint32_t index_bits = log2_bank_size - (config.B << config.A);
assert(log2_bank_size >= config.B);
assert(log2_bank_size >= config.B);
this->log2_num_inputs = log2ceil(config.num_inputs);
this->words_per_block = 1 << offset_bits;
this->blocks_per_set = 1 << config.A;
this->sets_per_bank = 1 << index_bits;
@@ -104,7 +107,7 @@ struct set_t {
struct bank_req_info_t {
bool valid;
uint32_t req_id;
uint32_t req_tag;
uint64_t req_tag;
};
struct bank_req_t {
@@ -194,7 +197,7 @@ public:
return root_entry;
}
bool try_pop(bank_req_t* out) {
bool pop(bank_req_t* out) {
for (auto& entry : entries_) {
if (entry.valid && entry.mshr_replay) {
*out = entry;
@@ -208,16 +211,13 @@ public:
};
struct bank_t {
std::vector<set_t> sets;
MSHR mshr;
std::queue<bank_req_t> stall_buffer;
bank_req_t active_req;
std::vector<set_t> sets;
MSHR mshr;
bank_t(const CacheConfig& config,
const params_t& params)
: sets(params.sets_per_bank, params.blocks_per_set)
, mshr(config.mshr_size)
, active_req(config.ports_per_bank)
{}
};
@@ -229,8 +229,8 @@ private:
CacheConfig config_;
params_t params_;
std::vector<bank_t> banks_;
std::vector<std::queue<uint32_t>> core_rsps_;
Switch<MemReq, MemRsp>::Ptr mem_switch_;
Switch<MemReq, MemRsp>::Ptr mem_switch_;
Switch<MemReq, MemRsp>::Ptr bypass_switch_;
std::vector<MasterPort<MemReq>> mem_req_ports_;
std::vector<SlavePort<MemRsp>> mem_rsp_ports_;
@@ -240,241 +240,270 @@ public:
, config_(config)
, params_(config)
, banks_(config.num_banks, {config, params_})
, core_rsps_(config.num_inputs)
, mem_req_ports_(config.num_banks, simobject)
, mem_rsp_ports_(config.num_banks, simobject)
{
bypass_switch_ = Switch<MemReq, MemRsp>::Create("bypass_arb", ArbiterType::Priority, 2);
bypass_switch_->ReqOut.bind(&simobject->MemReqPort);
simobject->MemRspPort.bind(&bypass_switch_->RspIn);
if (config.num_banks > 1) {
mem_switch_ = Switch<MemReq, MemRsp>::Create("mem_arb", ArbiterType::RoundRobin, config.num_banks);
for (uint32_t i = 0, n = config.num_banks; i < n; ++i) {
mem_req_ports_.at(i).bind(&mem_switch_->ReqIn.at(i));
mem_switch_->RspOut.at(i).bind(&mem_rsp_ports_.at(i));
}
mem_switch_->ReqOut.bind(&simobject->MemReqPort);
simobject->MemRspPort.bind(&mem_switch_->RspIn);
mem_switch_->ReqOut.bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspOut.at(0).bind(&mem_switch_->RspIn);
} else {
mem_req_ports_.at(0).bind(&simobject->MemReqPort);
simobject->MemRspPort.bind(&mem_rsp_ports_.at(0));
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspOut.at(0).bind(&mem_rsp_ports_.at(0));
}
}
void step(uint64_t /*cycle*/) {
// process core response
for (uint32_t req_id = 0, n = config_.num_inputs; req_id < n; ++req_id) {
auto& core_rsp = core_rsps_.at(req_id);
if (!core_rsp.empty()) {
simobject_->CoreRspPorts.at(req_id).send(MemRsp{core_rsp.front()}, config_.latency);
core_rsp.pop();
}
// handle bypasss responses
auto& bypass_port = bypass_switch_->RspOut.at(1);
if (!bypass_port.empty()) {
auto& mem_rsp = bypass_port.top();
uint32_t req_id = mem_rsp.tag & ((1 << params_.log2_num_inputs)-1);
uint64_t tag = mem_rsp.tag >> params_.log2_num_inputs;
MemRsp core_rsp(tag);
simobject_->CoreRspPorts.at(req_id).send(core_rsp, config_.latency);
bypass_port.pop();
}
for (auto& bank : banks_) {
auto& active_req = bank.active_req;
std::vector<bank_req_t> pipeline_reqs(config_.num_banks, config_.ports_per_bank);
// try chedule mshr replay
if (!active_req.valid) {
bank.mshr.try_pop(&active_req);
}
// try schedule stall queue if MSHR has space
if (!active_req.valid
&& !bank.stall_buffer.empty()
&& !bank.mshr.full()) {
active_req = bank.stall_buffer.front();
bank.stall_buffer.pop();
}
}
// handle MSHR replay
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
auto& bank = banks_.at(bank_id);
auto& pipeline_req = pipeline_reqs.at(bank_id);
bank.mshr.pop(&pipeline_req);
}
// handle memory fills
for (uint32_t i = 0, n = config_.num_banks; i < n; ++i) {
MemRsp mem_rsp;
if (mem_rsp_ports_.at(i).read(&mem_rsp)) {
this->processMemoryFill(i, mem_rsp.tag);
std::vector<bool> pending_fill_req(config_.num_banks, false);
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
auto& mem_rsp_port = mem_rsp_ports_.at(bank_id);
if (!mem_rsp_port.empty()) {
auto& mem_rsp = mem_rsp_port.top();
this->processMemoryFill(bank_id, mem_rsp.tag);
pending_fill_req.at(bank_id) = true;
mem_rsp_port.pop();
}
}
// handle incoming core requests
for (uint32_t i = 0, n = config_.num_inputs; i < n; ++i) {
MemReq core_req;
if (!simobject_->CoreReqPorts.at(i).read(&core_req))
for (uint32_t req_id = 0, n = config_.num_inputs; req_id < n; ++req_id) {
auto& core_req_port = simobject_->CoreReqPorts.at(req_id);
if (core_req_port.empty())
continue;
auto bank_id = params_.addr_bank_id(core_req.addr);
auto set_id = params_.addr_set_id(core_req.addr);
auto tag = params_.addr_tag(core_req.addr);
auto port_id = i % config_.ports_per_bank;
auto& core_req = core_req_port.top();
// check cache bypassing
if (core_req.is_io) {
// send IO request
this->processIORequest(core_req, req_id);
// remove request
core_req_port.pop();
continue;
}
auto bank_id = params_.addr_bank_id(core_req.addr);
auto set_id = params_.addr_set_id(core_req.addr);
auto tag = params_.addr_tag(core_req.addr);
auto port_id = req_id % config_.ports_per_bank;
// create abnk request
// create bank request
bank_req_t bank_req(config_.ports_per_bank);
bank_req.valid = true;
bank_req.write = core_req.write;
bank_req.mshr_replay = false;
bank_req.tag = tag;
bank_req.set_id = set_id;
bank_req.infos.at(port_id) = {true, i, core_req.tag};
bank_req.infos.at(port_id) = {true, req_id, core_req.tag};
auto& bank = banks_.at(bank_id);
// check MSHR capacity
if (bank.mshr.full()) {
// add to stall buffer
bank.stall_buffer.emplace(bank_req);
auto& bank = banks_.at(bank_id);
auto& pipeline_req = pipeline_reqs.at(bank_id);
// check pending MSHR replay
if (pipeline_req.valid
&& pipeline_req.mshr_replay) {
// stall
continue;
}
// check pending fill request
if (pending_fill_req.at(bank_id)) {
// stall
continue;
}
auto& active_req = bank.active_req;
// check pending MSHR request
if (active_req.valid
&& active_req.mshr_replay) {
// add to stall buffer
bank.stall_buffer.emplace(bank_req);
// check MSHR capacity if read or writeback
if ((!core_req.write || !config_.write_through)
&& bank.mshr.full()) {
// stall
continue;
}
}
// check bank conflicts
if (active_req.valid) {
if (pipeline_req.valid) {
// check port conflict
if (active_req.write != core_req.write
|| active_req.set_id != set_id
|| active_req.tag != tag
|| active_req.infos[port_id].valid) {
// add to stall buffer
bank.stall_buffer.emplace(bank_req);
if (pipeline_req.write != core_req.write
|| pipeline_req.set_id != set_id
|| pipeline_req.tag != tag
|| pipeline_req.infos[port_id].valid) {
// stall
continue;
}
// update pending request infos
active_req.infos[port_id] = bank_req.infos[port_id];
pipeline_req.infos[port_id] = bank_req.infos[port_id];
} else {
// schedule new request
active_req = bank_req;
pipeline_req = bank_req;
}
// remove request
core_req_port.pop();
}
// process active request
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
this->processBankRequest(bank_id);
// process active request
this->processBankRequest(pipeline_reqs);
}
void processIORequest(const MemReq& core_req, uint32_t req_id) {
{
MemReq mem_req(core_req);
mem_req.tag = (core_req.tag << params_.log2_num_inputs) + req_id;
bypass_switch_->ReqIn.at(1).send(mem_req, 1);
}
if (core_req.write && config_.write_reponse) {
simobject_->CoreRspPorts.at(req_id).send(MemRsp{core_req.tag}, 1);
}
}
void processMemoryFill(uint32_t bank_id, uint32_t mshr_id) {
// update block
auto& bank = banks_.at(bank_id);
auto& root_entry = bank.mshr.replay(mshr_id);
auto& set = bank.sets.at(root_entry.set_id);
auto& block = set.blocks.at(root_entry.block_id);
auto& bank = banks_.at(bank_id);
auto& entry = bank.mshr.replay(mshr_id);
auto& set = bank.sets.at(entry.set_id);
auto& block = set.blocks.at(entry.block_id);
block.valid = true;
block.tag = root_entry.tag;
block.tag = entry.tag;
}
void processBankRequest(uint32_t bank_id) {
auto& bank = banks_.at(bank_id);
auto& active_req = bank.active_req;
if (!active_req.valid)
return;
void processBankRequest(const std::vector<bank_req_t>& pipeline_reqs) {
for (uint32_t bank_id = 0, n = config_.num_banks; bank_id < n; ++bank_id) {
auto& pipeline_req = pipeline_reqs.at(bank_id);
if (!pipeline_req.valid)
continue;
active_req.valid = false;
auto& bank = banks_.at(bank_id);
auto& set = bank.sets.at(pipeline_req.set_id);
auto& set = bank.sets.at(active_req.set_id);
if (active_req.mshr_replay) {
// send core response
for (auto& info : active_req.infos) {
core_rsps_.at(info.req_id).emplace(info.req_tag);
}
} else {
bool hit = false;
bool found_free_block = false;
int hit_block_id = 0;
int repl_block_id = 0;
uint32_t max_cnt = 0;
for (int i = 0, n = set.blocks.size(); i < n; ++i) {
auto& block = set.blocks.at(i);
if (block.valid) {
if (block.tag == active_req.tag) {
block.lru_ctr = 0;
hit_block_id = i;
hit = true;
} else {
++block.lru_ctr;
}
if (max_cnt < block.lru_ctr) {
max_cnt = block.lru_ctr;
if (pipeline_req.mshr_replay) {
// send core response
for (auto& info : pipeline_req.infos) {
simobject_->CoreRspPorts.at(info.req_id).send(MemRsp{info.req_tag}, config_.latency);
}
} else {
bool hit = false;
bool found_free_block = false;
int hit_block_id = 0;
int repl_block_id = 0;
uint32_t max_cnt = 0;
for (int i = 0, n = set.blocks.size(); i < n; ++i) {
auto& block = set.blocks.at(i);
if (block.valid) {
if (block.tag == pipeline_req.tag) {
block.lru_ctr = 0;
hit_block_id = i;
hit = true;
} else {
++block.lru_ctr;
}
if (max_cnt < block.lru_ctr) {
max_cnt = block.lru_ctr;
repl_block_id = i;
}
} else {
found_free_block = true;
repl_block_id = i;
}
} else {
found_free_block = true;
repl_block_id = i;
}
}
if (hit) {
//
// MISS handling
//
if (active_req.write) {
// handle write hit
auto& hit_block = set.blocks.at(hit_block_id);
if (config_.write_through) {
// forward write request to memory
MemReq mem_req;
mem_req.addr = params_.mem_addr(bank_id, active_req.set_id, hit_block.tag);
mem_req.write = true;
mem_req.tag = 0;
mem_req_ports_.at(bank_id).send(mem_req, 1);
} else {
// mark block as dirty
hit_block.dirty = true;
}
}
// send core response
for (auto& info : active_req.infos) {
core_rsps_.at(info.req_id).emplace(info.req_tag);
}
} else {
//
// MISS handling
//
if (!found_free_block && !config_.write_through) {
// write back dirty block
auto& repl_block = set.blocks.at(repl_block_id);
if (repl_block.dirty) {
MemReq mem_req;
mem_req.addr = params_.mem_addr(bank_id, active_req.set_id, repl_block.tag);
mem_req.write = true;
mem_req.tag = 0;
mem_req_ports_.at(bank_id).send(mem_req, 1);
}
}
if (active_req.write && config_.write_through) {
// forward write request to memory
{
MemReq mem_req;
mem_req.addr = params_.mem_addr(bank_id, active_req.set_id, active_req.tag);
mem_req.write = true;
mem_req.tag = 0;
mem_req_ports_.at(bank_id).send(mem_req, 1);
if (hit) {
//
// MISS handling
//
if (pipeline_req.write) {
// handle write hit
auto& hit_block = set.blocks.at(hit_block_id);
if (config_.write_through) {
// forward write request to memory
MemReq mem_req;
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, hit_block.tag);
mem_req.write = true;
mem_req_ports_.at(bank_id).send(mem_req, 1);
} else {
// mark block as dirty
hit_block.dirty = true;
}
}
// send core response
for (auto& info : active_req.infos) {
core_rsps_.at(info.req_id).emplace(info.req_tag);
if (!pipeline_req.write || config_.write_reponse) {
for (auto& info : pipeline_req.infos) {
simobject_->CoreRspPorts.at(info.req_id).send(MemRsp{info.req_tag}, config_.latency);
}
}
} else {
//
// MISS handling
//
if (!found_free_block && !config_.write_through) {
// write back dirty block
auto& repl_block = set.blocks.at(repl_block_id);
if (repl_block.dirty) {
MemReq mem_req;
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, repl_block.tag);
mem_req.write = true;
mem_req_ports_.at(bank_id).send(mem_req, 1);
}
}
} else {
// lookup
int pending = bank.mshr.lookup(active_req);
// allocate MSHR
int mshr_id = bank.mshr.allocate(active_req, repl_block_id);
// send fill request
if (pending == -1) {
MemReq mem_req;
mem_req.addr = params_.mem_addr(bank_id, active_req.set_id, active_req.tag);
mem_req.write = active_req.write;
mem_req.tag = mshr_id;
mem_req_ports_.at(bank_id).send(mem_req, 1);
if (pipeline_req.write && config_.write_through) {
// forward write request to memory
{
MemReq mem_req;
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
mem_req.write = true;
mem_req_ports_.at(bank_id).send(mem_req, 1);
}
// send core response
if (config_.write_reponse) {
for (auto& info : pipeline_req.infos) {
simobject_->CoreRspPorts.at(info.req_id).send(MemRsp{info.req_tag}, config_.latency);
}
}
} else {
// MSHR lookup
int pending = bank.mshr.lookup(pipeline_req);
// allocate MSHR
int mshr_id = bank.mshr.allocate(pipeline_req, repl_block_id);
// send fill request
if (pending == -1) {
MemReq mem_req;
mem_req.addr = params_.mem_addr(bank_id, pipeline_req.set_id, pipeline_req.tag);
mem_req.write = pipeline_req.write;
mem_req.tag = mshr_id;
mem_req_ports_.at(bank_id).send(mem_req, 1);
}
}
}
}