dcache response bus optimization

This commit is contained in:
Blaise Tine
2021-07-12 10:14:48 -07:00
parent b99fb41d52
commit 5c40422e4f
16 changed files with 350 additions and 259 deletions

View File

@@ -49,6 +49,14 @@ FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using FPNEW FPU core # using FPNEW FPU core
FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# test cache banking
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
# test cache multi-porting
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
CONFIGS="-DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
# test 128-bit MEM block # test 128-bit MEM block
CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
@@ -66,7 +74,7 @@ CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=4 --
CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm CONFIGS="-DVERILATOR_RESET_VALUE=1" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm
# test long memory latency # test long memory latency
CONFIGS="-DMEM_LATENCY=100 -DMEM_RQ_SIZE=4 -DMEM_STALLS_MODULO=4" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=sgemm CONFIGS="-DMEM_LATENCY=100 -DMEM_RQ_SIZE=4 -DMEM_STALLS_MODULO=4" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
# test pipeline stress # test pipeline stress
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm --args="-n128" ./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm --args="-n128"

View File

@@ -127,6 +127,7 @@ module VX_cluster #(
.core_rsp_data (per_core_mem_rsp_data), .core_rsp_data (per_core_mem_rsp_data),
.core_rsp_tag (per_core_mem_rsp_tag), .core_rsp_tag (per_core_mem_rsp_tag),
.core_rsp_ready (per_core_mem_rsp_ready), .core_rsp_ready (per_core_mem_rsp_ready),
`UNUSED_PIN (core_rsp_tmask),
// Memory request // Memory request
.mem_req_valid (mem_req_valid), .mem_req_valid (mem_req_valid),

View File

@@ -101,6 +101,7 @@ module VX_core #(
// Dcache core reponse // Dcache core reponse
.dcache_rsp_valid (dcache_core_rsp_if.valid), .dcache_rsp_valid (dcache_core_rsp_if.valid),
.dcache_rsp_tmask (dcache_core_rsp_if.tmask),
.dcache_rsp_data (dcache_core_rsp_if.data), .dcache_rsp_data (dcache_core_rsp_if.data),
.dcache_rsp_tag (dcache_core_rsp_if.tag), .dcache_rsp_tag (dcache_core_rsp_if.tag),
.dcache_rsp_ready (dcache_core_rsp_if.ready), .dcache_rsp_ready (dcache_core_rsp_if.ready),

View File

@@ -120,7 +120,7 @@ module VX_lsu_unit #(
wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready; wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready; wire dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready;
wire mbuf_push = (| dcache_req_fire) wire mbuf_push = (| dcache_req_fire)
&& is_req_start // first submission only && is_req_start // first submission only
@@ -177,7 +177,7 @@ module VX_lsu_unit #(
end end
end end
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid; assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.tmask;
always @(posedge clk) begin always @(posedge clk) begin
if (mbuf_push) begin if (mbuf_push) begin
@@ -212,11 +212,12 @@ module VX_lsu_unit #(
end end
always @(*) begin always @(*) begin
mem_req_data = req_data[i];
case (req_offset[i]) case (req_offset[i])
1: mem_req_data[31:8] = req_data[i][23:0]; 1: mem_req_data[31:8] = req_data[i][23:0];
2: mem_req_data[31:16] = req_data[i][15:0]; 2: mem_req_data[31:16] = req_data[i][15:0];
3: mem_req_data[31:24] = req_data[i][7:0]; 3: mem_req_data[31:24] = req_data[i][7:0];
default: mem_req_data = req_data[i]; default:;
endcase endcase
end end
@@ -269,7 +270,7 @@ module VX_lsu_unit #(
end end
end end
assign rsp_tmask_qual = rsp_is_dup ? rsp_tmask : dcache_rsp_if.valid; assign rsp_tmask_qual = rsp_is_dup ? rsp_tmask : dcache_rsp_if.tmask;
// send load commit // send load commit
@@ -282,8 +283,8 @@ module VX_lsu_unit #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (!load_rsp_stall), .enable (!load_rsp_stall),
.data_in ({(| dcache_rsp_if.valid), rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}), .data_in ({dcache_rsp_if.valid, rsp_wid, rsp_tmask_qual, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop}) .data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
); );
// Can accept new cache response? // Can accept new cache response?
@@ -298,7 +299,7 @@ module VX_lsu_unit #(
`SCOPE_ASSIGN (dcache_req_byteen,dcache_req_if.byteen); `SCOPE_ASSIGN (dcache_req_byteen,dcache_req_if.byteen);
`SCOPE_ASSIGN (dcache_req_data, dcache_req_if.data); `SCOPE_ASSIGN (dcache_req_data, dcache_req_if.data);
`SCOPE_ASSIGN (dcache_req_tag, req_tag); `SCOPE_ASSIGN (dcache_req_tag, req_tag);
`SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.valid & {`NUM_THREADS{dcache_rsp_if.ready}}); `SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.tmask & {`NUM_THREADS{dcache_rsp_fire}});
`SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data); `SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data);
`SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr); `SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr);
@@ -339,8 +340,8 @@ module VX_lsu_unit #(
end end
end end
if (dcache_rsp_fire) begin if (dcache_rsp_fire) begin
$write("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=", $write("%t: D$%0d Rsp: tmask=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=",
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, mbuf_raddr, rsp_rd); $time, CORE_ID, dcache_rsp_if.tmask, rsp_wid, rsp_pc, mbuf_raddr, rsp_rd);
`PRINT_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS); `PRINT_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS);
$write(", is_dup=%b\n", rsp_is_dup); $write(", is_dup=%b\n", rsp_is_dup);
end end

View File

@@ -107,6 +107,7 @@ module VX_mem_unit # (
.core_rsp_data (icache_core_rsp_if.data), .core_rsp_data (icache_core_rsp_if.data),
.core_rsp_tag (icache_core_rsp_if.tag), .core_rsp_tag (icache_core_rsp_if.tag),
.core_rsp_ready (icache_core_rsp_if.ready), .core_rsp_ready (icache_core_rsp_if.ready),
`UNUSED_PIN (core_rsp_tmask),
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
.perf_cache_if (perf_icache_if), .perf_cache_if (perf_icache_if),
@@ -162,6 +163,7 @@ module VX_mem_unit # (
// Core response // Core response
.core_rsp_valid (dcache_rsp_if.valid), .core_rsp_valid (dcache_rsp_if.valid),
.core_rsp_tmask (dcache_rsp_if.tmask),
.core_rsp_data (dcache_rsp_if.data), .core_rsp_data (dcache_rsp_if.data),
.core_rsp_tag (dcache_rsp_if.tag), .core_rsp_tag (dcache_rsp_if.tag),
.core_rsp_ready (dcache_rsp_if.ready), .core_rsp_ready (dcache_rsp_if.ready),
@@ -241,6 +243,7 @@ module VX_mem_unit # (
// Core response // Core response
.core_rsp_valid (smem_rsp_if.valid), .core_rsp_valid (smem_rsp_if.valid),
.core_rsp_tmask (smem_rsp_if.tmask),
.core_rsp_data (smem_rsp_if.data), .core_rsp_data (smem_rsp_if.data),
.core_rsp_tag (smem_rsp_if.tag), .core_rsp_tag (smem_rsp_if.tag),
.core_rsp_ready (smem_rsp_if.ready) .core_rsp_ready (smem_rsp_if.ready)

View File

@@ -19,7 +19,8 @@ module VX_pipeline #(
input wire [`NUM_THREADS-1:0] dcache_req_ready, input wire [`NUM_THREADS-1:0] dcache_req_ready,
// Dcache core reponse // Dcache core reponse
input wire [`NUM_THREADS-1:0] dcache_rsp_valid, input wire dcache_rsp_valid,
input wire [`NUM_THREADS-1:0] dcache_rsp_tmask,
input wire [`NUM_THREADS-1:0][31:0] dcache_rsp_data, input wire [`NUM_THREADS-1:0][31:0] dcache_rsp_data,
input wire [`DCORE_TAG_WIDTH-1:0] dcache_rsp_tag, input wire [`DCORE_TAG_WIDTH-1:0] dcache_rsp_tag,
output wire dcache_rsp_ready, output wire dcache_rsp_ready,
@@ -72,6 +73,7 @@ module VX_pipeline #(
) dcache_core_rsp_if(); ) dcache_core_rsp_if();
assign dcache_core_rsp_if.valid = dcache_rsp_valid; assign dcache_core_rsp_if.valid = dcache_rsp_valid;
assign dcache_core_rsp_if.tmask = dcache_rsp_tmask;
assign dcache_core_rsp_if.data = dcache_rsp_data; assign dcache_core_rsp_if.data = dcache_rsp_data;
assign dcache_core_rsp_if.tag = dcache_rsp_tag; assign dcache_core_rsp_if.tag = dcache_rsp_tag;
assign dcache_rsp_ready = dcache_core_rsp_if.ready; assign dcache_rsp_ready = dcache_core_rsp_if.ready;
@@ -130,12 +132,21 @@ module VX_pipeline #(
VX_perf_pipeline_if perf_pipeline_if(); VX_perf_pipeline_if perf_pipeline_if();
`endif `endif
wire fetch_reset, decode_reset, issue_reset, execute_reset, commit_reset;
VX_reset_relay #(
.NUM_NODES (5)
) reset_relay (
.clk (clk),
.reset (reset),
.reset_o ({fetch_reset, decode_reset, issue_reset, execute_reset, commit_reset})
);
VX_fetch #( VX_fetch #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) fetch ( ) fetch (
`SCOPE_BIND_VX_pipeline_fetch `SCOPE_BIND_VX_pipeline_fetch
.clk (clk), .clk (clk),
.reset (reset), .reset (fetch_reset),
.icache_req_if (icache_core_req_if), .icache_req_if (icache_core_req_if),
.icache_rsp_if (icache_core_rsp_if), .icache_rsp_if (icache_core_rsp_if),
.wstall_if (wstall_if), .wstall_if (wstall_if),
@@ -150,7 +161,7 @@ module VX_pipeline #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) decode ( ) decode (
.clk (clk), .clk (clk),
.reset (reset), .reset (decode_reset),
.ifetch_rsp_if (ifetch_rsp_if), .ifetch_rsp_if (ifetch_rsp_if),
.decode_if (decode_if), .decode_if (decode_if),
.wstall_if (wstall_if), .wstall_if (wstall_if),
@@ -163,7 +174,7 @@ module VX_pipeline #(
`SCOPE_BIND_VX_pipeline_issue `SCOPE_BIND_VX_pipeline_issue
.clk (clk), .clk (clk),
.reset (reset), .reset (issue_reset),
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
.perf_pipeline_if (perf_pipeline_if), .perf_pipeline_if (perf_pipeline_if),
@@ -185,7 +196,7 @@ module VX_pipeline #(
`SCOPE_BIND_VX_pipeline_execute `SCOPE_BIND_VX_pipeline_execute
.clk (clk), .clk (clk),
.reset (reset), .reset (execute_reset),
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
.perf_memsys_if (perf_memsys_if), .perf_memsys_if (perf_memsys_if),
@@ -219,7 +230,7 @@ module VX_pipeline #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) commit ( ) commit (
.clk (clk), .clk (clk),
.reset (reset), .reset (commit_reset),
.alu_commit_if (alu_commit_if), .alu_commit_if (alu_commit_if),
.ld_commit_if (ld_commit_if), .ld_commit_if (ld_commit_if),

View File

@@ -53,32 +53,21 @@ module VX_smem_arb (
// handle responses // handle responses
// //
wire [1:0] rsp_valid_in;
wire [1:0][RSP_DATAW-1:0] rsp_data_in;
wire [`NUM_THREADS-1:0] core_rsp_tmask;
wire core_rsp_valid;
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
assign rsp_valid_in[1] = (| smem_rsp_if.valid);
assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, {cache_rsp_if.tag, 1'b0}};
assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, {smem_rsp_if.tag, 1'b1}};
VX_stream_arbiter #( VX_stream_arbiter #(
.NUM_REQS (2), .NUM_REQS (2),
.DATAW (RSP_DATAW), .DATAW (RSP_DATAW),
.TYPE ("X"),
.BUFFERED (1) .BUFFERED (1)
) rsp_arb ( ) rsp_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (rsp_valid_in), .valid_in ({smem_rsp_if.valid, cache_rsp_if.valid}),
.data_in (rsp_data_in), .data_in ({{smem_rsp_if.tmask, smem_rsp_if.data, {smem_rsp_if.tag, 1'b1}},
{cache_rsp_if.tmask, cache_rsp_if.data, {cache_rsp_if.tag, 1'b0}}}),
.ready_in ({smem_rsp_if.ready, cache_rsp_if.ready}), .ready_in ({smem_rsp_if.ready, cache_rsp_if.ready}),
.valid_out (core_rsp_valid), .valid_out (core_rsp_if.valid),
.data_out ({core_rsp_tmask, core_rsp_if.data, core_rsp_if.tag}), .data_out ({core_rsp_if.tmask, core_rsp_if.data, core_rsp_if.tag}),
.ready_out (core_rsp_if.ready) .ready_out (core_rsp_if.ready)
); );
assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_tmask;
endmodule endmodule

View File

@@ -125,6 +125,7 @@ module Vortex (
.core_rsp_data (per_cluster_mem_rsp_data), .core_rsp_data (per_cluster_mem_rsp_data),
.core_rsp_tag (per_cluster_mem_rsp_tag), .core_rsp_tag (per_cluster_mem_rsp_tag),
.core_rsp_ready (per_cluster_mem_rsp_ready), .core_rsp_ready (per_cluster_mem_rsp_ready),
`UNUSED_PIN (core_rsp_tmask),
// Memory request // Memory request
.mem_req_valid (mem_req_valid), .mem_req_valid (mem_req_valid),

View File

@@ -64,10 +64,11 @@ module VX_cache #(
output wire [NUM_REQS-1:0] core_req_ready, output wire [NUM_REQS-1:0] core_req_ready,
// Core response // Core response
output wire [NUM_REQS-1:0] core_rsp_valid, output wire [`CORE_RSP_TAGS-1:0] core_rsp_valid,
output wire [NUM_REQS-1:0] core_rsp_tmask,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data, output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, output wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready, input wire [`CORE_RSP_TAGS-1:0] core_rsp_ready,
// Memory request // Memory request
output wire mem_req_valid, output wire mem_req_valid,
@@ -86,6 +87,7 @@ module VX_cache #(
); );
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
`STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value"))
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
wire [NUM_BANKS-1:0] perf_read_miss_per_bank; wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
@@ -97,39 +99,40 @@ module VX_cache #(
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// Core request // Core request
wire [NUM_REQS-1:0] core_req_valid_out; wire [NUM_REQS-1:0] core_req_valid_nc;
wire [NUM_REQS-1:0] core_req_rw_out; wire [NUM_REQS-1:0] core_req_rw_nc;
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr_out; wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr_nc;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_out; wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_nc;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data_out; wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data_nc;
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_out; wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_nc;
wire [NUM_REQS-1:0] core_req_ready_out; wire [NUM_REQS-1:0] core_req_ready_nc;
// Core response // Core response
wire [NUM_REQS-1:0] core_rsp_valid_in; wire [`CORE_RSP_TAGS-1:0] core_rsp_valid_nc;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in; wire [NUM_REQS-1:0] core_rsp_tmask_nc;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_in; wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_nc;
wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready_in; wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_nc;
wire [`CORE_RSP_TAGS-1:0] core_rsp_ready_nc;
// Memory request // Memory request
wire mem_req_valid_in; wire mem_req_valid_nc;
wire mem_req_rw_in; wire mem_req_rw_nc;
wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_in; wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_nc;
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_in; wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_nc;
wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_in; wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_nc;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_in; wire [MEM_TAG_WIDTH-1:0] mem_req_tag_nc;
wire mem_req_ready_in; wire mem_req_ready_nc;
// Memory response // Memory response
wire mem_rsp_valid_out; wire mem_rsp_valid_nc;
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_out; wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_nc;
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_out; wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_nc;
wire mem_rsp_ready_out; wire mem_rsp_ready_nc;
if (NC_ENABLE) begin if (NC_ENABLE) begin
VX_nc_bypass #( VX_nc_bypass #(
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS),
.NUM_RSP_TAGS (`CORE_REQ_TAG_COUNT), .NUM_RSP_TAGS (`CORE_RSP_TAGS),
.NC_TAG_BIT (0), .NC_TAG_BIT (0),
.CORE_ADDR_WIDTH(`WORD_ADDR_WIDTH), .CORE_ADDR_WIDTH(`WORD_ADDR_WIDTH),
@@ -153,34 +156,36 @@ module VX_cache #(
.core_req_ready_in (core_req_ready), .core_req_ready_in (core_req_ready),
// Core request out // Core request out
.core_req_valid_out (core_req_valid_out), .core_req_valid_out (core_req_valid_nc),
.core_req_rw_out (core_req_rw_out), .core_req_rw_out (core_req_rw_nc),
.core_req_byteen_out(core_req_byteen_out), .core_req_byteen_out(core_req_byteen_nc),
.core_req_addr_out (core_req_addr_out), .core_req_addr_out (core_req_addr_nc),
.core_req_data_out (core_req_data_out), .core_req_data_out (core_req_data_nc),
.core_req_tag_out (core_req_tag_out), .core_req_tag_out (core_req_tag_nc),
.core_req_ready_out (core_req_ready_out), .core_req_ready_out (core_req_ready_nc),
// Core response in // Core response in
.core_rsp_valid_in (core_rsp_valid_in), .core_rsp_valid_in (core_rsp_valid_nc),
.core_rsp_data_in (core_rsp_data_in), .core_rsp_tmask_in (core_rsp_tmask_nc),
.core_rsp_tag_in (core_rsp_tag_in), .core_rsp_data_in (core_rsp_data_nc),
.core_rsp_ready_in (core_rsp_ready_in), .core_rsp_tag_in (core_rsp_tag_nc),
.core_rsp_ready_in (core_rsp_ready_nc),
// Core response out // Core response out
.core_rsp_valid_out (core_rsp_valid), .core_rsp_valid_out (core_rsp_valid),
.core_rsp_tmask_out (core_rsp_tmask),
.core_rsp_data_out (core_rsp_data), .core_rsp_data_out (core_rsp_data),
.core_rsp_tag_out (core_rsp_tag), .core_rsp_tag_out (core_rsp_tag),
.core_rsp_ready_out (core_rsp_ready), .core_rsp_ready_out (core_rsp_ready),
// Memory request in // Memory request in
.mem_req_valid_in (mem_req_valid_in), .mem_req_valid_in (mem_req_valid_nc),
.mem_req_rw_in (mem_req_rw_in), .mem_req_rw_in (mem_req_rw_nc),
.mem_req_byteen_in (mem_req_byteen_in), .mem_req_byteen_in (mem_req_byteen_nc),
.mem_req_addr_in (mem_req_addr_in), .mem_req_addr_in (mem_req_addr_nc),
.mem_req_data_in (mem_req_data_in), .mem_req_data_in (mem_req_data_nc),
.mem_req_tag_in (mem_req_tag_in), .mem_req_tag_in (mem_req_tag_nc),
.mem_req_ready_in (mem_req_ready_in), .mem_req_ready_in (mem_req_ready_nc),
// Memory request out // Memory request out
.mem_req_valid_out (mem_req_valid), .mem_req_valid_out (mem_req_valid),
@@ -198,52 +203,53 @@ module VX_cache #(
.mem_rsp_ready_in (mem_rsp_ready), .mem_rsp_ready_in (mem_rsp_ready),
// Memory response out // Memory response out
.mem_rsp_valid_out (mem_rsp_valid_out), .mem_rsp_valid_out (mem_rsp_valid_nc),
.mem_rsp_data_out (mem_rsp_data_out), .mem_rsp_data_out (mem_rsp_data_nc),
.mem_rsp_tag_out (mem_rsp_tag_out), .mem_rsp_tag_out (mem_rsp_tag_nc),
.mem_rsp_ready_out (mem_rsp_ready_out) .mem_rsp_ready_out (mem_rsp_ready_nc)
); );
end else begin end else begin
assign core_req_valid_out = core_req_valid; assign core_req_valid_nc = core_req_valid;
assign core_req_rw_out = core_req_rw; assign core_req_rw_nc = core_req_rw;
assign core_req_addr_out = core_req_addr; assign core_req_addr_nc = core_req_addr;
assign core_req_byteen_out = core_req_byteen; assign core_req_byteen_nc = core_req_byteen;
assign core_req_data_out = core_req_data; assign core_req_data_nc = core_req_data;
assign core_req_tag_out = core_req_tag; assign core_req_tag_nc = core_req_tag;
assign core_req_ready = core_req_ready_out; assign core_req_ready = core_req_ready_nc;
assign core_rsp_valid = core_rsp_valid_in; assign core_rsp_valid = core_rsp_valid_nc;
assign core_rsp_data = core_rsp_data_in; assign core_rsp_tmask = core_rsp_tmask_nc;
assign core_rsp_tag = core_rsp_tag_in; assign core_rsp_data = core_rsp_data_nc;
assign core_rsp_ready_in = core_rsp_ready; assign core_rsp_tag = core_rsp_tag_nc;
assign core_rsp_ready_nc = core_rsp_ready;
assign mem_req_valid = mem_req_valid_in; assign mem_req_valid = mem_req_valid_nc;
assign mem_req_rw = mem_req_rw_in; assign mem_req_rw = mem_req_rw_nc;
assign mem_req_addr = mem_req_addr_in; assign mem_req_addr = mem_req_addr_nc;
assign mem_req_byteen = mem_req_byteen_in; assign mem_req_byteen = mem_req_byteen_nc;
assign mem_req_data = mem_req_data_in; assign mem_req_data = mem_req_data_nc;
assign mem_req_tag = mem_req_tag_in; assign mem_req_tag = mem_req_tag_nc;
assign mem_req_ready_in = mem_req_ready; assign mem_req_ready_nc = mem_req_ready;
assign mem_rsp_valid_out = mem_rsp_valid; assign mem_rsp_valid_nc = mem_rsp_valid;
assign mem_rsp_data_out = mem_rsp_data; assign mem_rsp_data_nc = mem_rsp_data;
assign mem_rsp_tag_out = mem_rsp_tag; assign mem_rsp_tag_nc = mem_rsp_tag;
assign mem_rsp_ready = mem_rsp_ready_out; assign mem_rsp_ready = mem_rsp_ready_nc;
end end
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual; wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual;
wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_out_a, mem_rsp_tag_qual; wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_nc_a, mem_rsp_tag_qual;
wire mrsq_full, mrsq_empty; wire mrsq_full, mrsq_empty;
wire mrsq_push, mrsq_pop; wire mrsq_push, mrsq_pop;
assign mrsq_push = mem_rsp_valid_out && mem_rsp_ready_out; assign mrsq_push = mem_rsp_valid_nc && mem_rsp_ready_nc;
assign mem_rsp_ready_out = !mrsq_full; assign mem_rsp_ready_nc = !mrsq_full;
// trim out shared memory and non-cacheable flags // trim out shared memory and non-cacheable flags
assign mem_rsp_tag_out_a = mem_rsp_tag_out[NC_ENABLE +: `MEM_ADDR_WIDTH]; assign mem_rsp_tag_nc_a = mem_rsp_tag_nc[NC_ENABLE +: `MEM_ADDR_WIDTH];
VX_fifo_queue #( VX_fifo_queue #(
.DATAW (`MEM_ADDR_WIDTH + `CACHE_LINE_WIDTH), .DATAW (`MEM_ADDR_WIDTH + `CACHE_LINE_WIDTH),
@@ -254,7 +260,7 @@ module VX_cache #(
.reset (reset), .reset (reset),
.push (mrsq_push), .push (mrsq_push),
.pop (mrsq_pop), .pop (mrsq_pop),
.data_in ({mem_rsp_tag_out_a, mem_rsp_data_out}), .data_in ({mem_rsp_tag_nc_a, mem_rsp_data_nc}),
.data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}), .data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}),
.empty (mrsq_empty), .empty (mrsq_empty),
.full (mrsq_full), .full (mrsq_full),
@@ -263,7 +269,7 @@ module VX_cache #(
`UNUSED_PIN (size) `UNUSED_PIN (size)
); );
`UNUSED_VAR (mem_rsp_tag_out) `UNUSED_VAR (mem_rsp_tag_nc)
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@@ -316,7 +322,7 @@ module VX_cache #(
assign mrsq_pop = !mrsq_empty && per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)]; assign mrsq_pop = !mrsq_empty && per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)];
end end
VX_cache_core_req_bank_sel #( VX_core_req_bank_sel #(
.CACHE_ID (CACHE_ID), .CACHE_ID (CACHE_ID),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE), .CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
@@ -331,13 +337,13 @@ module VX_cache #(
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
.bank_stalls(perf_cache_if.bank_stalls), .bank_stalls(perf_cache_if.bank_stalls),
`endif `endif
.core_req_valid (core_req_valid_out), .core_req_valid (core_req_valid_nc),
.core_req_rw (core_req_rw_out), .core_req_rw (core_req_rw_nc),
.core_req_addr (core_req_addr_out), .core_req_addr (core_req_addr_nc),
.core_req_byteen(core_req_byteen_out), .core_req_byteen (core_req_byteen_nc),
.core_req_data (core_req_data_out), .core_req_data (core_req_data_nc),
.core_req_tag (core_req_tag_out), .core_req_tag (core_req_tag_nc),
.core_req_ready (core_req_ready_out), .core_req_ready (core_req_ready_nc),
.per_bank_core_req_valid (per_bank_core_req_valid), .per_bank_core_req_valid (per_bank_core_req_valid),
.per_bank_core_req_rw (per_bank_core_req_rw), .per_bank_core_req_rw (per_bank_core_req_rw),
.per_bank_core_req_addr (per_bank_core_req_addr), .per_bank_core_req_addr (per_bank_core_req_addr),
@@ -491,7 +497,7 @@ module VX_cache #(
); );
end end
VX_cache_core_rsp_merge #( VX_core_rsp_merge #(
.CACHE_ID (CACHE_ID), .CACHE_ID (CACHE_ID),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.NUM_PORTS (NUM_PORTS), .NUM_PORTS (NUM_PORTS),
@@ -508,10 +514,11 @@ module VX_cache #(
.per_bank_core_rsp_tag (per_bank_core_rsp_tag), .per_bank_core_rsp_tag (per_bank_core_rsp_tag),
.per_bank_core_rsp_tid (per_bank_core_rsp_tid), .per_bank_core_rsp_tid (per_bank_core_rsp_tid),
.per_bank_core_rsp_ready (per_bank_core_rsp_ready), .per_bank_core_rsp_ready (per_bank_core_rsp_ready),
.core_rsp_valid (core_rsp_valid_in), .core_rsp_valid (core_rsp_valid_nc),
.core_rsp_tag (core_rsp_tag_in), .core_rsp_tmask (core_rsp_tmask_nc),
.core_rsp_data (core_rsp_data_in), .core_rsp_tag (core_rsp_tag_nc),
.core_rsp_ready (core_rsp_ready_in) .core_rsp_data (core_rsp_data_nc),
.core_rsp_ready (core_rsp_ready_nc)
); );
wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in; wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in;
@@ -529,16 +536,16 @@ module VX_cache #(
.valid_in (per_bank_mem_req_valid), .valid_in (per_bank_mem_req_valid),
.data_in (data_in), .data_in (data_in),
.ready_in (per_bank_mem_req_ready), .ready_in (per_bank_mem_req_ready),
.valid_out (mem_req_valid_in), .valid_out (mem_req_valid_nc),
.data_out ({mem_req_addr_in, mem_req_rw_in, mem_req_byteen_in, mem_req_data_in}), .data_out ({mem_req_addr_nc, mem_req_rw_nc, mem_req_byteen_nc, mem_req_data_nc}),
.ready_out (mem_req_ready_in) .ready_out (mem_req_ready_nc)
); );
// build memory tag adding non-cacheable flag // build memory tag adding non-cacheable flag
if (NC_ENABLE) begin if (NC_ENABLE) begin
assign mem_req_tag_in = MEM_TAG_WIDTH'({mem_req_addr_in, 1'b0}); assign mem_req_tag_nc = MEM_TAG_WIDTH'({mem_req_addr_nc, 1'b0});
end else begin end else begin
assign mem_req_tag_in = MEM_TAG_WIDTH'(mem_req_addr_in); assign mem_req_tag_nc = MEM_TAG_WIDTH'(mem_req_addr_nc);
end end
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
@@ -551,7 +558,7 @@ module VX_cache #(
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw); assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
if (CORE_TAG_ID_BITS != 0) begin if (CORE_TAG_ID_BITS != 0) begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & {NUM_REQS{!core_rsp_ready}}); assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}});
end else begin end else begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready); assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
end end

View File

@@ -53,7 +53,7 @@
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`define CORE_REQ_TAG_COUNT ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS) `define CORE_RSP_TAGS ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS)
`define BANK_READY_COUNT ((SHARED_BANK_READY != 0) ? 1 : NUM_BANKS) `define BANK_READY_COUNT ((SHARED_BANK_READY != 0) ? 1 : NUM_BANKS)

View File

@@ -1,6 +1,6 @@
`include "VX_cache_define.vh" `include "VX_cache_define.vh"
module VX_cache_core_req_bank_sel #( module VX_core_req_bank_sel #(
parameter CACHE_ID = 0, parameter CACHE_ID = 0,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
@@ -24,7 +24,7 @@ module VX_cache_core_req_bank_sel #(
input wire reset, input wire reset,
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
output wire [`PERF_CTR_BITS-1:0] bank_stalls, output wire [`PERF_CTR_BITS-1:0] bank_stalls,
`endif `endif
input wire [NUM_REQS-1:0] core_req_valid, input wire [NUM_REQS-1:0] core_req_valid,
@@ -46,7 +46,8 @@ module VX_cache_core_req_bank_sel #(
input wire [`BANK_READY_COUNT-1:0] per_bank_core_req_ready input wire [`BANK_READY_COUNT-1:0] per_bank_core_req_ready
); );
`UNUSED_PARAM (CACHE_ID) `UNUSED_PARAM (CACHE_ID)
`STATIC_ASSERT (NUM_REQS >= NUM_BANKS, ("invalid number of banks")); `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
`STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value"))
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
@@ -72,21 +73,19 @@ module VX_cache_core_req_bank_sel #(
end end
end end
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_valid_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r;
reg [NUM_BANKS-1:0] per_bank_core_req_rw_r;
reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r;
reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
reg [NUM_REQS-1:0] core_req_ready_r;
if (NUM_REQS > 1) begin if (NUM_REQS > 1) begin
reg [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_valid_r; if (NUM_PORTS > 1) begin
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r;
reg [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r;
reg [NUM_BANKS-1:0] per_bank_core_req_rw_r;
reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r;
reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r;
reg [NUM_REQS-1:0] core_req_ready_r;
if (NUM_PORTS > 1) begin
reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_line_addr_r; reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_line_addr_r;
wire [NUM_REQS-1:0] core_req_line_match; wire [NUM_REQS-1:0] core_req_line_match;
@@ -201,7 +200,7 @@ module VX_cache_core_req_bank_sel #(
end end
end end
end else begin end else begin
always @(*) begin always @(*) begin
per_bank_core_req_valid_r = 0; per_bank_core_req_valid_r = 0;
@@ -227,14 +226,28 @@ module VX_cache_core_req_bank_sel #(
end end
end end
if (SHARED_BANK_READY == 0) begin if (NUM_BANKS > 1) begin
always @(*) begin if (SHARED_BANK_READY == 0) begin
core_req_ready_r = 0; always @(*) begin
for (integer j = 0; j < NUM_BANKS; ++j) begin core_req_ready_r = 0;
for (integer i = 0; i < NUM_REQS; ++i) begin for (integer j = 0; j < NUM_BANKS; ++j) begin
if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin for (integer i = 0; i < NUM_REQS; ++i) begin
core_req_ready_r[i] = per_bank_core_req_ready[j]; if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin
break; core_req_ready_r[i] = per_bank_core_req_ready[j];
break;
end
end
end
end
end else begin
always @(*) begin
core_req_ready_r = 0;
for (integer j = 0; j < NUM_BANKS; ++j) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin
core_req_ready_r[i] = per_bank_core_req_ready;
break;
end
end end
end end
end end
@@ -242,42 +255,64 @@ module VX_cache_core_req_bank_sel #(
end else begin end else begin
always @(*) begin always @(*) begin
core_req_ready_r = 0; core_req_ready_r = 0;
for (integer j = 0; j < NUM_BANKS; ++j) begin for (integer i = 0; i < NUM_REQS; ++i) begin
for (integer i = 0; i < NUM_REQS; ++i) begin if (core_req_valid[i]) begin
if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin core_req_ready_r[i] = per_bank_core_req_ready;
core_req_ready_r[i] = per_bank_core_req_ready; break;
break;
end
end end
end end
end end
end end
end end
assign per_bank_core_req_valid = per_bank_core_req_valid_r;
assign per_bank_core_req_rw = per_bank_core_req_rw_r;
assign per_bank_core_req_addr = per_bank_core_req_addr_r;
assign per_bank_core_req_wsel = per_bank_core_req_wsel_r;
assign per_bank_core_req_byteen = per_bank_core_req_byteen_r;
assign per_bank_core_req_data = per_bank_core_req_data_r;
assign per_bank_core_req_tag = per_bank_core_req_tag_r;
assign per_bank_core_req_tid = per_bank_core_req_tid_r;
assign core_req_ready = core_req_ready_r;
end else begin end else begin
`UNUSED_VAR (core_req_bid) if (NUM_BANKS > 1) begin
always @(*) begin
per_bank_core_req_valid_r = 0;
per_bank_core_req_rw_r = 'x;
per_bank_core_req_addr_r = 'x;
per_bank_core_req_wsel_r = 'x;
per_bank_core_req_byteen_r= 'x;
per_bank_core_req_data_r = 'x;
per_bank_core_req_tag_r = 'x;
per_bank_core_req_tid_r = 'x;
per_bank_core_req_valid_r[core_req_bid[0]] = core_req_valid;
per_bank_core_req_rw_r[core_req_bid[0]] = core_req_rw;
per_bank_core_req_addr_r[core_req_bid[0]] = core_req_line_addr;
per_bank_core_req_wsel_r[core_req_bid[0]] = core_req_wsel;
per_bank_core_req_byteen_r[core_req_bid[0]] = core_req_byteen;
per_bank_core_req_data_r[core_req_bid[0]] = core_req_data;
per_bank_core_req_tag_r[core_req_bid[0]] = core_req_tag;
per_bank_core_req_tid_r[core_req_bid[0]] = 0;
core_req_ready_r = per_bank_core_req_ready[core_req_bid[0]];
end
end else begin
`UNUSED_VAR (core_req_bid)
always @(*) begin
per_bank_core_req_valid_r = core_req_valid;
per_bank_core_req_rw_r = core_req_rw;
per_bank_core_req_addr_r = core_req_line_addr;
per_bank_core_req_wsel_r = core_req_wsel;
per_bank_core_req_byteen_r = core_req_byteen;
per_bank_core_req_data_r = core_req_data;
per_bank_core_req_tag_r = core_req_tag;
per_bank_core_req_tid_r = 0;
core_req_ready_r = per_bank_core_req_ready;
end
end
assign per_bank_core_req_valid = core_req_valid; end
assign per_bank_core_req_rw = core_req_rw;
assign per_bank_core_req_addr = core_req_line_addr; assign per_bank_core_req_valid = per_bank_core_req_valid_r;
assign per_bank_core_req_wsel = core_req_wsel; assign per_bank_core_req_rw = per_bank_core_req_rw_r;
assign per_bank_core_req_byteen = core_req_byteen; assign per_bank_core_req_addr = per_bank_core_req_addr_r;
assign per_bank_core_req_data = core_req_data; assign per_bank_core_req_wsel = per_bank_core_req_wsel_r;
assign per_bank_core_req_tag = core_req_tag; assign per_bank_core_req_byteen = per_bank_core_req_byteen_r;
assign per_bank_core_req_tid = 0; assign per_bank_core_req_data = per_bank_core_req_data_r;
assign core_req_ready = per_bank_core_req_ready; assign per_bank_core_req_tag = per_bank_core_req_tag_r;
end assign per_bank_core_req_tid = per_bank_core_req_tid_r;
assign core_req_ready = core_req_ready_r;
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
reg [NUM_REQS-1:0] core_req_sel_r; reg [NUM_REQS-1:0] core_req_sel_r;

View File

@@ -1,6 +1,6 @@
`include "VX_cache_define.vh" `include "VX_cache_define.vh"
module VX_cache_core_rsp_merge #( module VX_core_rsp_merge #(
parameter CACHE_ID = 0, parameter CACHE_ID = 0,
// Number of Word requests per cycle // Number of Word requests per cycle
@@ -28,10 +28,11 @@ module VX_cache_core_rsp_merge #(
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready, output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
// Core Response // Core Response
output wire [NUM_REQS-1:0] core_rsp_valid, output wire [`CORE_RSP_TAGS-1:0] core_rsp_valid,
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag, output wire [NUM_REQS-1:0] core_rsp_tmask,
output wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data, output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready input wire [`CORE_RSP_TAGS-1:0] core_rsp_ready
); );
`UNUSED_PARAM (CACHE_ID) `UNUSED_PARAM (CACHE_ID)
@@ -100,9 +101,6 @@ module VX_cache_core_rsp_merge #(
end end
wire core_rsp_valid_out;
wire [NUM_REQS-1:0] core_rsp_valid_out_mask;
wire core_rsp_valid_any = (| per_bank_core_rsp_valid); wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
VX_skid_buffer #( VX_skid_buffer #(
@@ -113,13 +111,11 @@ module VX_cache_core_rsp_merge #(
.valid_in (core_rsp_valid_any), .valid_in (core_rsp_valid_any),
.data_in ({core_rsp_valid_unqual, core_rsp_tag_unqual, core_rsp_data_unqual}), .data_in ({core_rsp_valid_unqual, core_rsp_tag_unqual, core_rsp_data_unqual}),
.ready_in (core_rsp_ready_unqual), .ready_in (core_rsp_ready_unqual),
.valid_out (core_rsp_valid_out), .valid_out (core_rsp_valid),
.data_out ({core_rsp_valid_out_mask, core_rsp_tag, core_rsp_data}), .data_out ({core_rsp_tmask, core_rsp_tag, core_rsp_data}),
.ready_out (core_rsp_ready) .ready_out (core_rsp_ready)
); );
assign core_rsp_valid = {NUM_REQS{core_rsp_valid_out}} & core_rsp_valid_out_mask;
end else begin end else begin
`UNUSED_VAR (per_bank_core_rsp_pmask) `UNUSED_VAR (per_bank_core_rsp_pmask)
@@ -167,6 +163,8 @@ module VX_cache_core_rsp_merge #(
); );
end end
assign core_rsp_tmask = core_rsp_valid;
end end
for (genvar i = 0; i < NUM_BANKS; i++) begin for (genvar i = 0; i < NUM_BANKS; i++) begin
@@ -181,38 +179,48 @@ module VX_cache_core_rsp_merge #(
if (NUM_REQS > 1) begin if (NUM_REQS > 1) begin
reg [NUM_REQS-1:0] core_rsp_valid_unqual; reg [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
if (CORE_TAG_ID_BITS != 0) begin if (CORE_TAG_ID_BITS != 0) begin
reg [NUM_REQS-1:0] core_rsp_tmask_unqual;
always @(*) begin always @(*) begin
core_rsp_valid_unqual = 0; core_rsp_tmask_unqual = 0;
core_rsp_tag_unqual = per_bank_core_rsp_tag; core_rsp_tmask_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
core_rsp_data_unqual = 'x;
core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid; core_rsp_tag_unqual = per_bank_core_rsp_tag;
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
core_rsp_data_unqual = 'x;
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
end end
assign core_rsp_valid = per_bank_core_rsp_valid;
assign core_rsp_tmask = core_rsp_tmask_unqual;
assign per_bank_core_rsp_ready = core_rsp_ready; assign per_bank_core_rsp_ready = core_rsp_ready;
end else begin end else begin
reg [`CORE_RSP_TAGS-1:0] core_rsp_valid_unqual;
always @(*) begin always @(*) begin
core_rsp_valid_unqual = 0; core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = 'x;
core_rsp_data_unqual = 'x;
core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid; core_rsp_valid_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_valid;
core_rsp_tag_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_tag;
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data; core_rsp_tag_unqual = 'x;
core_rsp_tag_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_tag;
core_rsp_data_unqual = 'x;
core_rsp_data_unqual[per_bank_core_rsp_tid] = per_bank_core_rsp_data;
end end
assign core_rsp_valid = core_rsp_valid_unqual;
assign core_rsp_tmask = core_rsp_valid_unqual;
assign per_bank_core_rsp_ready = core_rsp_ready[per_bank_core_rsp_tid]; assign per_bank_core_rsp_ready = core_rsp_ready[per_bank_core_rsp_tid];
end end
assign core_rsp_valid = core_rsp_valid_unqual;
assign core_rsp_tag = core_rsp_tag_unqual; assign core_rsp_tag = core_rsp_tag_unqual;
assign core_rsp_data = core_rsp_data_unqual; assign core_rsp_data = core_rsp_data_unqual;
@@ -220,6 +228,7 @@ module VX_cache_core_rsp_merge #(
`UNUSED_VAR(per_bank_core_rsp_tid) `UNUSED_VAR(per_bank_core_rsp_tid)
assign core_rsp_valid = per_bank_core_rsp_valid; assign core_rsp_valid = per_bank_core_rsp_valid;
assign core_rsp_tmask = per_bank_core_rsp_valid;
assign core_rsp_tag = per_bank_core_rsp_tag; assign core_rsp_tag = per_bank_core_rsp_tag;
assign core_rsp_data = per_bank_core_rsp_data; assign core_rsp_data = per_bank_core_rsp_data;
assign per_bank_core_rsp_ready = core_rsp_ready; assign per_bank_core_rsp_ready = core_rsp_ready;

View File

@@ -38,13 +38,15 @@ module VX_nc_bypass #(
input wire [NUM_REQS-1:0] core_req_ready_out, input wire [NUM_REQS-1:0] core_req_ready_out,
// Core response in // Core response in
input wire [NUM_REQS-1:0] core_rsp_valid_in, input wire [NUM_RSP_TAGS-1:0] core_rsp_valid_in,
input wire [NUM_REQS-1:0] core_rsp_tmask_in,
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in, input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in,
input wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_in, input wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_in,
output wire [NUM_RSP_TAGS-1:0] core_rsp_ready_in, output wire [NUM_RSP_TAGS-1:0] core_rsp_ready_in,
// Core response out // Core response out
output wire [NUM_REQS-1:0] core_rsp_valid_out, output wire [NUM_RSP_TAGS-1:0] core_rsp_valid_out,
output wire [NUM_REQS-1:0] core_rsp_tmask_out,
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out, output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out,
output wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out, output wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out,
input wire [NUM_RSP_TAGS-1:0] core_rsp_ready_out, input wire [NUM_RSP_TAGS-1:0] core_rsp_ready_out,
@@ -209,39 +211,68 @@ module VX_nc_bypass #(
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT]; wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
if (NUM_REQS > 1) begin if (NUM_RSP_TAGS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
reg [NUM_REQS-1:0] core_rsp_valid_in_r; reg [NUM_REQS-1:0] rsp_nc_valid_r;
always @(*) begin always @(*) begin
core_rsp_valid_in_r = 0; rsp_nc_valid_r = 0;
core_rsp_valid_in_r[rsp_tid] = 1; rsp_nc_valid_r[rsp_tid] = is_mem_rsp_nc;
end end
assign core_rsp_valid_out = is_mem_rsp_nc ? core_rsp_valid_in_r : core_rsp_valid_in;
assign core_rsp_ready_in = is_mem_rsp_nc ? '0 : core_rsp_ready_out;
end else begin
assign core_rsp_valid_out = is_mem_rsp_nc || core_rsp_valid_in;
assign core_rsp_ready_in = ~is_mem_rsp_nc && core_rsp_ready_out;
end
if (D != 0) begin
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D];
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = is_mem_rsp_nc ?
mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_rsp_data_in[i];
end
end else begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = is_mem_rsp_nc ? mem_rsp_data_in : core_rsp_data_in[i];
end
end
for (genvar i = 0; i < NUM_RSP_TAGS; ++i) begin assign core_rsp_valid_out = core_rsp_valid_in | rsp_nc_valid_r;
assign core_rsp_tag_out[i] = is_mem_rsp_nc ? mem_rsp_tag_in[CORE_TAG_WIDTH-1:0] : core_rsp_tag_in[i]; assign core_rsp_tmask_out = core_rsp_tmask_in;
assign core_rsp_ready_in = core_rsp_ready_out & ~rsp_nc_valid_r;
if (D != 0) begin
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D];
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = rsp_nc_valid_r[i] ?
mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_rsp_data_in[i];
end
end else begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = rsp_nc_valid_r[i] ? mem_rsp_data_in : core_rsp_data_in[i];
end
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_tag_out[i] = rsp_nc_valid_r[i] ? mem_rsp_tag_in[CORE_TAG_WIDTH-1:0] : core_rsp_tag_in[i];
end
end else begin
assign core_rsp_valid_out = core_rsp_valid_in || is_mem_rsp_nc;
assign core_rsp_tag_out = is_mem_rsp_nc ? mem_rsp_tag_in[CORE_TAG_WIDTH-1:0] : core_rsp_tag_in;
assign core_rsp_ready_in = core_rsp_ready_out && ~is_mem_rsp_nc;
if (NUM_REQS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
reg [NUM_REQS-1:0] core_rsp_tmask_in_r;
always @(*) begin
core_rsp_tmask_in_r = 0;
core_rsp_tmask_in_r[rsp_tid] = 1;
end
assign core_rsp_tmask_out = is_mem_rsp_nc ? core_rsp_tmask_in_r : core_rsp_tmask_in;
end else begin
assign core_rsp_tmask_out = core_rsp_valid_out;
end
if (D != 0) begin
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D];
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = is_mem_rsp_nc ?
mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_rsp_data_in[i];
end
end else begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = is_mem_rsp_nc ? mem_rsp_data_in : core_rsp_data_in[i];
end
end
end end
// memory response handling // memory response handling
assign mem_rsp_valid_out = mem_rsp_valid_in && ~mem_rsp_tag_in[NC_TAG_BIT]; assign mem_rsp_valid_out = mem_rsp_valid_in && ~mem_rsp_tag_in[NC_TAG_BIT];
assign mem_rsp_data_out = mem_rsp_data_in;
assign mem_rsp_tag_out = mem_rsp_tag_in;
if (NUM_RSP_TAGS > 1) begin if (NUM_RSP_TAGS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW]; wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
@@ -250,7 +281,4 @@ module VX_nc_bypass #(
assign mem_rsp_ready_in = is_mem_rsp_nc ? core_rsp_ready_out : mem_rsp_ready_out; assign mem_rsp_ready_in = is_mem_rsp_nc ? core_rsp_ready_out : mem_rsp_ready_out;
end end
assign mem_rsp_data_out = mem_rsp_data_in;
assign mem_rsp_tag_out = mem_rsp_tag_in;
endmodule endmodule

View File

@@ -42,7 +42,8 @@ module VX_shared_mem #(
output wire [NUM_REQS-1:0] core_req_ready, output wire [NUM_REQS-1:0] core_req_ready,
// Core response // Core response
output wire [NUM_REQS-1:0] core_rsp_valid, output wire core_rsp_valid,
output wire [NUM_REQS-1:0] core_rsp_tmask,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data, output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag, output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready input wire core_rsp_ready
@@ -63,7 +64,7 @@ module VX_shared_mem #(
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual; wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual;
wire per_bank_core_req_ready_unqual; wire per_bank_core_req_ready_unqual;
VX_cache_core_req_bank_sel #( VX_core_req_bank_sel #(
.CACHE_ID (CACHE_ID), .CACHE_ID (CACHE_ID),
.CACHE_LINE_SIZE (WORD_SIZE), .CACHE_LINE_SIZE (WORD_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
@@ -79,13 +80,13 @@ module VX_shared_mem #(
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
.bank_stalls(perf_cache_if.bank_stalls), .bank_stalls(perf_cache_if.bank_stalls),
`endif `endif
.core_req_valid (core_req_valid), .core_req_valid (core_req_valid),
.core_req_rw (core_req_rw), .core_req_rw (core_req_rw),
.core_req_addr (core_req_addr), .core_req_addr (core_req_addr),
.core_req_byteen(core_req_byteen), .core_req_byteen (core_req_byteen),
.core_req_data (core_req_data), .core_req_data (core_req_data),
.core_req_tag (core_req_tag), .core_req_tag (core_req_tag),
.core_req_ready (core_req_ready), .core_req_ready (core_req_ready),
.per_bank_core_req_valid (per_bank_core_req_valid_unqual), .per_bank_core_req_valid (per_bank_core_req_valid_unqual),
.per_bank_core_req_tid (per_bank_core_req_tid_unqual), .per_bank_core_req_tid (per_bank_core_req_tid_unqual),
.per_bank_core_req_rw (per_bank_core_req_rw_unqual), .per_bank_core_req_rw (per_bank_core_req_rw_unqual),
@@ -233,9 +234,6 @@ module VX_shared_mem #(
end end
end end
wire [NUM_REQS-1:0] core_rsp_valids_out;
wire core_rsp_valid_out;
assign crsq_in_valid = ~creq_empty && core_req_has_read; assign crsq_in_valid = ~creq_empty && core_req_has_read;
VX_skid_buffer #( VX_skid_buffer #(
@@ -246,13 +244,11 @@ module VX_shared_mem #(
.valid_in (crsq_in_valid), .valid_in (crsq_in_valid),
.data_in ({core_rsp_valids_in, core_rsp_data_in, core_rsp_tag_in}), .data_in ({core_rsp_valids_in, core_rsp_data_in, core_rsp_tag_in}),
.ready_in (crsq_in_ready), .ready_in (crsq_in_ready),
.valid_out (core_rsp_valid_out), .valid_out (core_rsp_valid),
.data_out ({core_rsp_valids_out, core_rsp_data, core_rsp_tag}), .data_out ({core_rsp_tmask, core_rsp_data, core_rsp_tag}),
.ready_out (core_rsp_ready) .ready_out (core_rsp_ready)
); );
assign core_rsp_valid = core_rsp_valids_out & {NUM_REQS{core_rsp_valid_out}};
`ifdef DBG_CACHE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
wire [NUM_BANKS-1:0][31:0] debug_pc_st0, debug_pc_st1; wire [NUM_BANKS-1:0][31:0] debug_pc_st0, debug_pc_st1;
@@ -342,7 +338,7 @@ module VX_shared_mem #(
assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw); assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw);
if (CORE_TAG_ID_BITS != 0) begin if (CORE_TAG_ID_BITS != 0) begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & {NUM_REQS{!core_rsp_ready}}); assign perf_crsp_stall_per_cycle = $countones(core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}});
end else begin end else begin
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready); assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
end end

View File

@@ -9,10 +9,11 @@ interface VX_dcache_core_rsp_if #(
parameter CORE_TAG_WIDTH = 1 parameter CORE_TAG_WIDTH = 1
) (); ) ();
wire [NUM_REQS-1:0] valid; wire valid;
wire [NUM_REQS-1:0] tmask;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data; wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data;
wire [CORE_TAG_WIDTH-1:0] tag; wire [CORE_TAG_WIDTH-1:0] tag;
wire ready; wire ready;
endinterface endinterface

View File

@@ -1,6 +1,6 @@
PROJECT = Unittest PROJECT = Unittest
TOP_LEVEL_ENTITY = VX_cache_core_req_bank_sel TOP_LEVEL_ENTITY = VX_core_req_bank_sel
SRC_FILE = VX_cache_core_req_bank_sel.v SRC_FILE = VX_core_req_bank_sel.v
RTL_DIR = ../../../../rtl RTL_DIR = ../../../../rtl
FAMILY = "Arria 10" FAMILY = "Arria 10"