performance refactoring - rebalanced stream buffers accross the device to enforce output buffering rule at compoments boudaries, finally resolved block ram R/W collusion discrepencies,

This commit is contained in:
Blaise Tine
2020-12-19 02:45:06 -08:00
parent 29cd2f5dff
commit 4bbd7bf408
76 changed files with 1313 additions and 1098 deletions

View File

@@ -5,6 +5,8 @@ module VX_databus_arb #(
parameter WORD_SIZE = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0,
parameter WORD_WIDTH = WORD_SIZE * 8,
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
@@ -43,12 +45,13 @@ module VX_databus_arb #(
output wire [NUM_REQS-1:0][WORD_WIDTH-1:0] rsp_data_out,
input wire [NUM_REQS-1:0] rsp_ready_out
);
localparam DATAW = `NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH);
localparam REQ_DATAW = `NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH);
localparam RSP_DATAW = TAG_IN_WIDTH + WORD_WIDTH;
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0] valids;
wire [NUM_REQS-1:0][DATAW-1:0] data_in;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] data_in;
wire [`NUM_THREADS-1:0] req_tmask_out;
wire req_valid_out_unqual;
@@ -58,34 +61,46 @@ module VX_databus_arb #(
end
VX_stream_arbiter #(
.NUM_REQS (NUM_REQS),
.DATAW (DATAW),
.IN_BUFFER (NUM_REQS >= 4),
.OUT_BUFFER (NUM_REQS >= 4)
.NUM_REQS (NUM_REQS),
.DATAW (REQ_DATAW),
.BUFFERED (BUFFERED_REQ)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (valids),
.data_in (data_in),
.ready_in (req_ready_in),
.valid_out (req_valid_out_unqual),
.data_out ({req_tmask_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
.ready_out (req_ready_out)
.clk (clk),
.reset (reset),
.valid_in (valids),
.data_in (data_in),
.ready_in (req_ready_in),
.valid_out (req_valid_out_unqual),
.data_out ({req_tmask_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
.ready_out (req_ready_out)
);
assign req_valid_out = {`NUM_THREADS{req_valid_out_unqual}} & req_tmask_out;
///////////////////////////////////////////////////////////////////////
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
for (genvar i = 0; i < NUM_REQS; i++) begin
assign rsp_valid_out[i] = rsp_valid_in && (rsp_sel == LOG_NUM_REQS'(i));
assign rsp_tag_out[i] = rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH];
assign rsp_data_out[i] = rsp_data_in;
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i];
end
assign rsp_ready_in = rsp_ready_out[rsp_sel];
VX_stream_demux #(
.NUM_REQS (NUM_REQS),
.DATAW (RSP_DATAW),
.BUFFERED (BUFFERED_RSP)
) rsp_demux (
.clk (clk),
.reset (reset),
.sel (rsp_sel),
.valid_in (rsp_valid_in),
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}),
.ready_in (rsp_ready_in),
.valid_out (rsp_valid_out),
.data_out (rsp_merged_data_out),
.ready_out (rsp_ready_out)
);
end else begin