using onehot multiplexer to reduce critical path

This commit is contained in:
Blaise Tine
2021-07-08 00:26:59 -07:00
parent dc34c5c5bd
commit 10e9ee124b
11 changed files with 161 additions and 207 deletions

View File

@@ -188,6 +188,7 @@ module VX_bank #(
wire creq_pop_unqual = !mshr_pop_unqual && !mrsq_pop_unqual && !creq_empty && !flush_enable;
wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1);
assign mshr_pop = mshr_pop_unqual
&& !(is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed
&& !crsq_in_stall; // ensure core response ready

View File

@@ -93,7 +93,6 @@ module VX_nc_bypass #(
// core request handling
reg [NUM_REQS-1:0] core_req_ready_in_r;
wire [NUM_REQS-1:0] core_req_valid_in_nc;
wire [NUM_REQS-1:0] core_req_nc_sel;
wire [NUM_REQS-1:0] core_req_nc_tids;
@@ -115,210 +114,130 @@ module VX_nc_bypass #(
.valid_out (core_req_nc_valid)
);
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
`UNUSED_VAR (core_req_nc_sel)
if (NUM_REQS > 1) begin
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid_in_nc[i]) begin
core_req_ready_in_r[i] = ~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i];
end else begin
core_req_ready_in_r[i] = core_req_ready_out[i];
end
end
end
end else begin
`UNUSED_VAR (core_req_nc_tid)
always @(*) begin
if (core_req_valid_in_nc) begin
core_req_ready_in_r = ~mem_req_valid_in && mem_req_ready_out;
end else begin
core_req_ready_in_r = core_req_ready_out;
end
end
end
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
assign core_req_rw_out = core_req_rw_in;
assign core_req_addr_out = core_req_addr_in;
assign core_req_byteen_out = core_req_byteen_in;
assign core_req_data_out = core_req_data_in;
assign core_req_tag_out = core_req_tag_in;
assign core_req_ready_in = core_req_ready_in_r;
if (NUM_REQS > 1) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ?
(~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i];
end
end else begin
`UNUSED_VAR (core_req_nc_sel)
assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out;
end
// memory request handling
reg mem_req_valid_out_r;
reg mem_req_rw_out_r;
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_out_r;
reg [MEM_ADDR_WIDTH-1:0] mem_req_addr_out_r;
reg [MEM_DATA_WIDTH-1:0] mem_req_data_out_r;
reg [MEM_TAG_WIDTH-1:0] mem_req_tag_out_r;
reg mem_req_ready_in_r;
always @(*) begin
if (mem_req_valid_in) begin
mem_req_valid_out_r = 1;
mem_req_ready_in_r = mem_req_ready_out;
end else begin
mem_req_valid_out_r = core_req_nc_valid;
mem_req_ready_in_r = 0;
end
end
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
assign mem_req_ready_in = mem_req_valid_in && mem_req_ready_out;
if (NUM_REQS > 1) begin
always @(*) begin
if (mem_req_valid_in) begin
mem_req_rw_out_r = mem_req_rw_in;
mem_req_addr_out_r = mem_req_addr_in;
mem_req_data_out_r = mem_req_data_in;
end else begin
mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid];
mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH];
for (integer i = 0; i < P; ++i) begin
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid];
end
end
wire [CORE_TAG_WIDTH-1:0] core_req_tag_in_sel;
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
wire core_req_rw_in_sel;
wire [NUM_REQS-1:0][(CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1)-1:0] core_req_nc_mux_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
end
VX_onehot_mux #(
.DATAW (CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1),
.COUNT (NUM_REQS)
) core_req_nc_mux (
.data_in (core_req_nc_mux_in),
.sel_in (core_req_nc_sel),
.data_out ({core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel})
);
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
for (genvar i = 0; i < P; ++i) begin
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in_sel;
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0];
always @(*) begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = 0;
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid];
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]});
end
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
always @(*) begin
mem_req_byteen_in_r = 0;
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in_sel;
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
end else begin
always @(*) begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid];
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]});
end
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
end
end else begin
always @(*) begin
if (mem_req_valid_in) begin
mem_req_rw_out_r = mem_req_rw_in;
mem_req_addr_out_r = mem_req_addr_in;
mem_req_data_out_r = mem_req_data_in;
end else begin
mem_req_rw_out_r = core_req_rw_in;
mem_req_addr_out_r = core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
for (integer i = 0; i < P; ++i) begin
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in;
end
end
end else begin
`UNUSED_VAR (core_req_nc_tid)
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
for (genvar i = 0; i < P; ++i) begin
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in;
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
always @(*) begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = 0;
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
mem_req_tag_out_r = MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in});
end
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
always @(*) begin
mem_req_byteen_in_r = 0;
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in});
end else begin
always @(*) begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = core_req_byteen_in;
mem_req_tag_out_r = MEM_TAG_WIDTH'(core_req_tag_in);
end
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in;
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'(core_req_tag_in);
end
end
assign mem_req_valid_out = mem_req_valid_out_r;
assign mem_req_rw_out = mem_req_rw_out_r;
assign mem_req_addr_out = mem_req_addr_out_r;
assign mem_req_byteen_out = mem_req_byteen_out_r;
assign mem_req_data_out = mem_req_data_out_r;
assign mem_req_tag_out = mem_req_tag_out_r;
assign mem_req_ready_in = mem_req_ready_in_r;
// core response handling
reg [NUM_REQS-1:0] core_rsp_valid_out_r;
reg [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out_r;
reg [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out_r;
reg [NUM_RSP_TAGS-1:0] core_rsp_ready_in_r;
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
if (NUM_REQS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
reg [NUM_REQS-1:0] core_rsp_valid_in_r;
always @(*) begin
if (is_mem_rsp_nc) begin
core_rsp_valid_out_r = 0;
core_rsp_valid_out_r[rsp_tid] = 1;
for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
end
core_rsp_ready_in_r = 0;
end else begin
core_rsp_valid_out_r = core_rsp_valid_in;
core_rsp_tag_out_r = core_rsp_tag_in;
core_rsp_ready_in_r = core_rsp_ready_out;
end
core_rsp_valid_in_r = 0;
core_rsp_valid_in_r[rsp_tid] = 1;
end
assign core_rsp_valid_out = is_mem_rsp_nc ? core_rsp_valid_in_r : core_rsp_valid_in;
assign core_rsp_ready_in = is_mem_rsp_nc ? '0 : core_rsp_ready_out;
end else begin
assign core_rsp_valid_out = is_mem_rsp_nc || core_rsp_valid_in;
assign core_rsp_ready_in = ~is_mem_rsp_nc && core_rsp_ready_out;
end
if (D != 0) begin
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D];
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = is_mem_rsp_nc ?
mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_rsp_data_in[i];
end
end else begin
always @(*) begin
if (is_mem_rsp_nc) begin
core_rsp_valid_out_r = 1;
core_rsp_tag_out_r = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
core_rsp_ready_in_r = 0;
end else begin
core_rsp_valid_out_r = core_rsp_valid_in;
core_rsp_tag_out_r = core_rsp_tag_in;
core_rsp_ready_in_r = core_rsp_ready_out;
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = is_mem_rsp_nc ? mem_rsp_data_in : core_rsp_data_in[i];
end
end
if (D != 0) begin
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D];
always @(*) begin
if (is_mem_rsp_nc) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_rsp_data_out_r[i] = mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
end
end else begin
core_rsp_data_out_r = core_rsp_data_in;
end
end
end else begin
always @(*) begin
if (is_mem_rsp_nc) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_rsp_data_out_r[i] = mem_rsp_data_in;
end
end else begin
core_rsp_data_out_r = core_rsp_data_in;
end
end
for (genvar i = 0; i < NUM_RSP_TAGS; ++i) begin
assign core_rsp_tag_out[i] = is_mem_rsp_nc ? mem_rsp_tag_in[CORE_TAG_WIDTH-1:0] : core_rsp_tag_in[i];
end
assign core_rsp_valid_out = core_rsp_valid_out_r;
assign core_rsp_data_out = core_rsp_data_out_r;
assign core_rsp_tag_out = core_rsp_tag_out_r;
assign core_rsp_ready_in = core_rsp_ready_in_r;
// memory response handling