Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
141
sim/simx/dispatcher.h
Normal file
141
sim/simx/dispatcher.h
Normal file
@@ -0,0 +1,141 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "pipeline.h"
|
||||
#include <queue>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Dispatcher : public SimObject<Dispatcher> {
|
||||
public:
|
||||
std::vector<SimPort<pipeline_trace_t*>> Outputs;
|
||||
|
||||
Dispatcher(const SimContext& ctx, const Arch& arch, uint32_t buf_size, uint32_t block_size, uint32_t num_lanes)
|
||||
: SimObject<Dispatcher>(ctx, "Dispatcher")
|
||||
, Outputs(ISSUE_WIDTH, this)
|
||||
, Inputs_(ISSUE_WIDTH, this)
|
||||
, arch_(arch)
|
||||
, queues_(ISSUE_WIDTH, std::queue<pipeline_trace_t*>())
|
||||
, buf_size_(buf_size)
|
||||
, block_size_(block_size)
|
||||
, num_lanes_(num_lanes)
|
||||
, batch_count_(ISSUE_WIDTH / block_size)
|
||||
, pid_count_(arch.num_threads() / num_lanes)
|
||||
, batch_idx_(0)
|
||||
, start_p_(block_size, 0)
|
||||
{}
|
||||
|
||||
virtual ~Dispatcher() {}
|
||||
|
||||
virtual void reset() {
|
||||
batch_idx_ = 0;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
start_p_.at(b) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void tick() {
|
||||
for (uint32_t i = 0; i < ISSUE_WIDTH; ++i) {
|
||||
auto& queue = queues_.at(i);
|
||||
if (queue.empty())
|
||||
continue;
|
||||
auto trace = queue.front();
|
||||
Inputs_.at(i).send(trace, 1);
|
||||
queue.pop();
|
||||
}
|
||||
|
||||
uint32_t block_sent = 0;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
uint32_t i = batch_idx_ * block_size_ + b;
|
||||
auto& input = Inputs_.at(i);
|
||||
if (input.empty()) {
|
||||
++block_sent;
|
||||
continue;
|
||||
}
|
||||
auto& output = Outputs.at(i);
|
||||
auto trace = input.front();
|
||||
if (pid_count_ != 1) {
|
||||
auto start_p = start_p_.at(b);
|
||||
if (start_p == -1) {
|
||||
++block_sent;
|
||||
continue;
|
||||
}
|
||||
int start(-1), end(-1);
|
||||
for (uint32_t j = start_p * num_lanes_, n = arch_.num_threads(); j < n; ++j) {
|
||||
if (!trace->tmask.test(j))
|
||||
continue;
|
||||
if (start == -1)
|
||||
start = j;
|
||||
end = j;
|
||||
}
|
||||
start /= num_lanes_;
|
||||
end /= num_lanes_;
|
||||
auto new_trace = new pipeline_trace_t(*trace);
|
||||
new_trace->tmask.reset();
|
||||
for (int j = start * num_lanes_, n = j + num_lanes_; j < n; ++j) {
|
||||
new_trace->tmask[j] = trace->tmask[j];
|
||||
}
|
||||
new_trace->pid = start;
|
||||
new_trace->sop = (start_p == 0);
|
||||
if (start == end) {
|
||||
new_trace->eop = 1;
|
||||
start_p_.at(b) = -1;
|
||||
input.pop();
|
||||
++block_sent;
|
||||
delete trace;
|
||||
} else {
|
||||
new_trace->eop = 0;
|
||||
start_p_.at(b) = start + 1;
|
||||
}
|
||||
output.send(new_trace, 1);
|
||||
DT(3, "pipeline-dispatch: " << *new_trace);
|
||||
} else {
|
||||
trace->pid = 0;
|
||||
input.pop();
|
||||
output.send(trace, 1);
|
||||
DT(3, "pipeline-dispatch: " << *trace);
|
||||
++block_sent;
|
||||
}
|
||||
}
|
||||
if (block_sent == block_size_) {
|
||||
batch_idx_ = (batch_idx_ + 1) % batch_count_;
|
||||
for (uint32_t b = 0; b < block_size_; ++b) {
|
||||
start_p_.at(b) = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
bool push(uint32_t issue_index, pipeline_trace_t* trace) {
|
||||
auto& queue = queues_.at(issue_index);
|
||||
if (queue.size() >= buf_size_)
|
||||
return false;
|
||||
queue.push(trace);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<SimPort<pipeline_trace_t*>> Inputs_;
|
||||
const Arch& arch_;
|
||||
std::vector<std::queue<pipeline_trace_t*>> queues_;
|
||||
uint32_t buf_size_;
|
||||
uint32_t block_size_;
|
||||
uint32_t num_lanes_;
|
||||
uint32_t batch_count_;
|
||||
uint32_t pid_count_;
|
||||
uint32_t batch_idx_;
|
||||
std::vector<int> start_p_;
|
||||
};
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user