refactoring all arbiters with buffering for request count > 2, optimized the cache core response module in critical path when running as L2

This commit is contained in:
Blaise Tine
2020-11-08 01:31:46 -08:00
parent b14007f930
commit 10505caae1
19 changed files with 602 additions and 534 deletions

View File

@@ -14,94 +14,103 @@ module VX_io_arb #(
input wire reset,
// input requests
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0] io_req_valid_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0] io_req_valid_in,
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_req_tag_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_in,
input wire [NUM_REQUESTS-1:0] io_req_rw_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_in,
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_req_tag_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_in,
output wire [NUM_REQUESTS-1:0] io_req_ready_in,
// input response
output wire [NUM_REQUESTS-1:0] io_rsp_valid_in,
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] io_rsp_data_in,
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_rsp_tag_in,
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] io_rsp_data_in,
input wire [NUM_REQUESTS-1:0] io_rsp_ready_in,
// output request
output wire [`NUM_THREADS-1:0] io_req_valid_out,
output wire [`NUM_THREADS-1:0] io_req_valid_out,
output wire [TAG_OUT_WIDTH-1:0] io_req_tag_out,
output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_out,
output wire io_req_rw_out,
output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_out,
output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_out,
output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_out,
output wire [TAG_OUT_WIDTH-1:0] io_req_tag_out,
output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_out,
output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_out,
input wire io_req_ready_out,
// output response
input wire io_rsp_valid_out,
input wire [WORD_WIDTH-1:0] io_rsp_data_out,
input wire [TAG_OUT_WIDTH-1:0] io_rsp_tag_out,
input wire [WORD_WIDTH-1:0] io_rsp_data_out,
output wire io_rsp_ready_out
);
if (NUM_REQUESTS == 1) begin
if (NUM_REQUESTS > 1) begin
wire [NUM_REQUESTS-1:0] valids;
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign valids[i] = (| io_req_valid_in[i]);
end
wire [REQS_BITS-1:0] req_idx;
wire [NUM_REQUESTS-1:0] req_1hot;
VX_rr_arbiter #(
.N(NUM_REQUESTS)
) req_arb (
.clk (clk),
.reset (reset),
.requests (valids),
`UNUSED_PIN (grant_valid),
.grant_index (req_idx),
.grant_onehot (req_1hot)
);
wire stall = (| io_req_valid_out) && ~io_req_ready_out;
VX_generic_register #(
.N(`NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH)),
.PASSTHRU(NUM_REQUESTS <= 2)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({io_req_valid_in[req_idx], {io_req_tag_in[req_idx], REQS_BITS'(req_idx)}, io_req_addr_in[req_idx], io_req_rw_in[req_idx], io_req_byteen_in[req_idx], io_req_data_in[req_idx]}),
.out ({io_req_valid_out, io_req_tag_out, io_req_addr_out, io_req_rw_out, io_req_byteen_out, io_req_data_out})
);
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign io_req_ready_in[i] = req_1hot[i] && ~stall;
end
///////////////////////////////////////////////////////////////////////
wire [REQS_BITS-1:0] rsp_sel = io_rsp_tag_out[REQS_BITS-1:0];
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign io_rsp_valid_in[i] = io_rsp_valid_out && (rsp_sel == REQS_BITS'(i));
assign io_rsp_tag_in[i] = io_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
assign io_rsp_data_in[i] = io_rsp_data_out;
end
assign io_rsp_ready_out = io_rsp_ready_in[rsp_sel];
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign io_req_valid_out = io_req_valid_in;
assign io_req_tag_out = io_req_tag_in;
assign io_req_addr_out = io_req_addr_in;
assign io_req_rw_out = io_req_rw_in;
assign io_req_byteen_out = io_req_byteen_in;
assign io_req_addr_out = io_req_addr_in;
assign io_req_data_out = io_req_data_in;
assign io_req_tag_out = io_req_tag_in;
assign io_req_ready_in = io_req_ready_out;
assign io_rsp_valid_in = io_rsp_valid_out;
assign io_rsp_data_in = io_rsp_data_out;
assign io_rsp_tag_in = io_rsp_tag_out;
assign io_rsp_data_in = io_rsp_data_out;
assign io_rsp_ready_out = io_rsp_ready_in;
end else begin
reg [REQS_BITS-1:0] bus_req_sel;
wire [NUM_REQUESTS-1:0] valid_requests;
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign valid_requests[i] = (| io_req_valid_in[i]);
end
VX_rr_arbiter #(
.N(NUM_REQUESTS)
) arbiter (
.clk (clk),
.reset (reset),
.requests (valid_requests),
.grant_index (bus_req_sel),
`UNUSED_PIN (grant_valid),
`UNUSED_PIN (grant_onehot)
);
assign io_req_valid_out = io_req_valid_in [bus_req_sel];
assign io_req_rw_out = io_req_rw_in [bus_req_sel];
assign io_req_byteen_out = io_req_byteen_in [bus_req_sel];
assign io_req_addr_out = io_req_addr_in [bus_req_sel];
assign io_req_data_out = io_req_data_in [bus_req_sel];
assign io_req_tag_out = {io_req_tag_in [bus_req_sel], REQS_BITS'(bus_req_sel)};
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign io_req_ready_in[i] = io_req_ready_out && (bus_req_sel == REQS_BITS'(i));
end
wire [REQS_BITS-1:0] bus_rsp_sel = io_rsp_tag_out[REQS_BITS-1:0];
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign io_rsp_valid_in[i] = io_rsp_valid_out && (bus_rsp_sel == REQS_BITS'(i));
assign io_rsp_data_in[i] = io_rsp_data_out;
assign io_rsp_tag_in[i] = io_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
end
assign io_rsp_ready_out = io_rsp_ready_in[bus_rsp_sel];
end
endmodule