snooping response handling

This commit is contained in:
Blaise Tine
2020-05-11 22:55:44 -04:00
parent b6c4aa0baa
commit c49f01b769
36 changed files with 848 additions and 456 deletions

View File

@@ -5,9 +5,9 @@ CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2 #MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 #MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 #MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#DEBUG = 1 DEBUG = 1
CFLAGS += -fPIC CFLAGS += -fPIC

View File

@@ -3,6 +3,6 @@
#define DEV_MEM_SRC_ADDR 0x10000000 #define DEV_MEM_SRC_ADDR 0x10000000
#define DEV_MEM_DST_ADDR 0x20000000 #define DEV_MEM_DST_ADDR 0x20000000
#define NUM_BLOCKS 16 #define NUM_BLOCKS 1
#endif #endif

Binary file not shown.

View File

@@ -111,7 +111,7 @@
`define DDFPQ_SIZE 32 `define DDFPQ_SIZE 32
`endif `endif
// Snoop Req Queue // Snoop Req Queue Size
`ifndef DSNRQ_SIZE `ifndef DSNRQ_SIZE
`define DSNRQ_SIZE 32 `define DSNRQ_SIZE 32
`endif `endif
@@ -136,9 +136,9 @@
`define DLLVQ_SIZE 0 `define DLLVQ_SIZE 0
`endif `endif
// Fill Forward SNP Queue // Snoop Rsp Queue Size
`ifndef DFFSQ_SIZE `ifndef DSRPQ_SIZE
`define DFFSQ_SIZE 32 `define DSRPQ_SIZE 32
`endif `endif
// Prefetcher // Prefetcher
@@ -197,7 +197,7 @@
`define IDFPQ_SIZE 32 `define IDFPQ_SIZE 32
`endif `endif
// Snoop Req Queue // Snoop Req Queue Size
`ifndef ISNRQ_SIZE `ifndef ISNRQ_SIZE
`define ISNRQ_SIZE 32 `define ISNRQ_SIZE 32
`endif `endif
@@ -222,9 +222,9 @@
`define ILLVQ_SIZE 16 `define ILLVQ_SIZE 16
`endif `endif
// Fill Forward SNP Queue // Snoop Rsp Queue Size
`ifndef IFFSQ_SIZE `ifndef ISRPQ_SIZE
`define IFFSQ_SIZE 8 `define ISRPQ_SIZE 8
`endif `endif
// Prefetcher // Prefetcher
@@ -283,7 +283,7 @@
`define SDFPQ_SIZE 0 `define SDFPQ_SIZE 0
`endif `endif
// Snoop Req Queue // Snoop Req Queue Size
`ifndef SSNRQ_SIZE `ifndef SSNRQ_SIZE
`define SSNRQ_SIZE 16 `define SSNRQ_SIZE 16
`endif `endif
@@ -308,9 +308,9 @@
`define SLLVQ_SIZE 16 `define SLLVQ_SIZE 16
`endif `endif
// Fill Forward SNP Queue // Snoop Rsp Queue Size
`ifndef SFFSQ_SIZE `ifndef SSRPQ_SIZE
`define SFFSQ_SIZE 16 `define SSRPQ_SIZE 16
`endif `endif
// Prefetcher // Prefetcher
@@ -369,7 +369,7 @@
`define L2DFPQ_SIZE 32 `define L2DFPQ_SIZE 32
`endif `endif
// Snoop Req Queue // Snoop Req Queue Size
`ifndef L2SNRQ_SIZE `ifndef L2SNRQ_SIZE
`define L2SNRQ_SIZE 32 `define L2SNRQ_SIZE 32
`endif `endif
@@ -394,9 +394,9 @@
`define L2LLVQ_SIZE 32 `define L2LLVQ_SIZE 32
`endif `endif
// Fill Forward SNP Queue // Snoop Rsp Queue Size
`ifndef L2FFSQ_SIZE `ifndef L2SRPQ_SIZE
`define L2FFSQ_SIZE 32 `define L2SRPQ_SIZE 32
`endif `endif
// Prefetcher // Prefetcher
@@ -455,7 +455,7 @@
`define L3DFPQ_SIZE 32 `define L3DFPQ_SIZE 32
`endif `endif
// Snoop Req Queue // Snoop Req Queue Size
`ifndef L3SNRQ_SIZE `ifndef L3SNRQ_SIZE
`define L3SNRQ_SIZE 32 `define L3SNRQ_SIZE 32
`endif `endif
@@ -480,9 +480,9 @@
`define L3LLVQ_SIZE 0 `define L3LLVQ_SIZE 0
`endif `endif
// Fill Forward SNP Queue // Snoop Rsp Queue Size
`ifndef L3FFSQ_SIZE `ifndef L3SRPQ_SIZE
`define L3FFSQ_SIZE 8 `define L3SRPQ_SIZE 8
`endif `endif
// Prefetcher // Prefetcher

View File

@@ -68,7 +68,7 @@ module VX_csr_pipe #(
assign csr_wb_if.wb = wb_s2; assign csr_wb_if.wb = wb_s2;
genvar i; genvar i;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin for (i = 0; i < `NUM_THREADS; i++) begin
assign csr_wb_if.data[i] = (csr_address_s2 == `CSR_LTID) ? i : assign csr_wb_if.data[i] = (csr_address_s2 == `CSR_LTID) ? i :
(csr_address_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) : (csr_address_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
csr_read_data_s2; csr_read_data_s2;

View File

@@ -11,11 +11,11 @@ module VX_csr_wrapper (
genvar i; genvar i;
generate generate
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : thread_ids_init for (i = 0; i < `NUM_THREADS; i++) begin : thread_ids_init
assign thread_ids[i] = i; assign thread_ids[i] = i;
end end
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : warp_ids_init for (i = 0; i < `NUM_THREADS; i++) begin : warp_ids_init
assign warp_ids[i] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num}; assign warp_ids[i] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
end end
endgenerate endgenerate

View File

@@ -29,6 +29,11 @@
if (!(cond)) $error(msg); \ if (!(cond)) $error(msg); \
endgenerate endgenerate
`define UNUSED(x) \
`IGNORE_WARNINGS_BEGIN \
if (x != 0) begin end \
`IGNORE_WARNINGS_END
`define CLOG2(x) $clog2(x) `define CLOG2(x) $clog2(x)
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > x) ? 1 : 0)) `define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > x) ? 1 : 0))
`define LOG2UP(x) ((x > 1) ? $clog2(x) : 1) `define LOG2UP(x) ((x > 1) ? $clog2(x) : 1)
@@ -138,6 +143,9 @@
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
`define DNUM_REQUESTS `NUM_THREADS `define DNUM_REQUESTS `NUM_THREADS
// Snoop request tag bits
`define DSNP_TAG_WIDTH `LOG2UP(`L2SNRQ_SIZE)
////////////////////////// Icache Configurable Knobs ////////////////////////// ////////////////////////// Icache Configurable Knobs //////////////////////////
// DRAM request data bits // DRAM request data bits
@@ -177,6 +185,9 @@
// DRAM request tag bits // DRAM request tag bits
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2))) `define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
// Snoop request tag bits
`define L2SNP_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `LOG2UP(`L3SNRQ_SIZE) : 1)
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
`define L2NUM_REQUESTS (2*`NUM_CORES) `define L2NUM_REQUESTS (2*`NUM_CORES)
@@ -191,6 +202,9 @@
// DRAM request tag bits // DRAM request tag bits
`define L3DRAM_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH) `define L3DRAM_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
// Snoop request tag bits
`define L3SNP_TAG_WIDTH 1
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
`define L3NUM_REQUESTS `NUM_CLUSTERS `define L3NUM_REQUESTS `NUM_CLUSTERS

View File

@@ -12,6 +12,7 @@ module VX_dmem_ctrl (
VX_cache_dram_req_if dcache_dram_req_if, VX_cache_dram_req_if dcache_dram_req_if,
VX_cache_dram_rsp_if dcache_dram_rsp_if, VX_cache_dram_rsp_if dcache_dram_rsp_if,
VX_cache_snp_req_if dcache_snp_req_if, VX_cache_snp_req_if dcache_snp_req_if,
VX_cache_snp_rsp_if dcache_snp_rsp_if,
// Core <-> Icache // Core <-> Icache
VX_cache_core_req_if icache_core_req_if, VX_cache_core_req_if icache_core_req_if,
@@ -63,7 +64,7 @@ module VX_dmem_ctrl (
.DWBQ_SIZE (`SDWBQ_SIZE), .DWBQ_SIZE (`SDWBQ_SIZE),
.DFQQ_SIZE (`SDFQQ_SIZE), .DFQQ_SIZE (`SDFQQ_SIZE),
.LLVQ_SIZE (`SLLVQ_SIZE), .LLVQ_SIZE (`SLLVQ_SIZE),
.FFSQ_SIZE (`SFFSQ_SIZE), .SRPQ_SIZE (`SSRPQ_SIZE),
.PRFQ_SIZE (`SPRFQ_SIZE), .PRFQ_SIZE (`SPRFQ_SIZE),
.PRFQ_STRIDE (`SPRFQ_STRIDE), .PRFQ_STRIDE (`SPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
@@ -110,12 +111,24 @@ module VX_dmem_ctrl (
// Snoop request // Snoop request
.snp_req_valid (0), .snp_req_valid (0),
.snp_req_addr (0), .snp_req_addr (0),
.snp_req_tag (0),
.snp_req_ready (), .snp_req_ready (),
// Snoop forwarding // Snoop response
.snp_fwd_valid (), .snp_rsp_valid (),
.snp_fwd_addr (), .snp_rsp_tag (),
.snp_fwd_ready (0) .snp_rsp_ready (0),
// Snoop forward out
.snp_fwdout_valid (),
.snp_fwdout_addr (),
.snp_fwdout_tag (),
.snp_fwdout_ready (0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_tag (0),
.snp_fwdin_ready ()
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
); );
@@ -134,7 +147,7 @@ module VX_dmem_ctrl (
.DWBQ_SIZE (`DDWBQ_SIZE), .DWBQ_SIZE (`DDWBQ_SIZE),
.DFQQ_SIZE (`DDFQQ_SIZE), .DFQQ_SIZE (`DDFQQ_SIZE),
.LLVQ_SIZE (`DLLVQ_SIZE), .LLVQ_SIZE (`DLLVQ_SIZE),
.FFSQ_SIZE (`DFFSQ_SIZE), .SRPQ_SIZE (`DSRPQ_SIZE),
.PRFQ_SIZE (`DPRFQ_SIZE), .PRFQ_SIZE (`DPRFQ_SIZE),
.PRFQ_STRIDE (`DPRFQ_STRIDE), .PRFQ_STRIDE (`DPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
@@ -143,7 +156,8 @@ module VX_dmem_ctrl (
.WRITE_ENABLE (1), .WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH), .CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS (`CORE_TAG_ID_BITS), .CORE_TAG_ID_BITS (`CORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH)
) gpu_dcache ( ) gpu_dcache (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -177,16 +191,28 @@ module VX_dmem_ctrl (
.dram_rsp_tag (dcache_dram_rsp_if.dram_rsp_tag), .dram_rsp_tag (dcache_dram_rsp_if.dram_rsp_tag),
.dram_rsp_ready (dcache_dram_rsp_if.dram_rsp_ready), .dram_rsp_ready (dcache_dram_rsp_if.dram_rsp_ready),
// Snoop Request // Snoop request
.snp_req_valid (dcache_snp_req_if.snp_req_valid), .snp_req_valid (dcache_snp_req_if.snp_req_valid),
.snp_req_addr (dcache_snp_req_if.snp_req_addr), .snp_req_addr (dcache_snp_req_if.snp_req_addr),
.snp_req_tag (dcache_snp_req_if.snp_req_tag),
.snp_req_ready (dcache_snp_req_if.snp_req_ready), .snp_req_ready (dcache_snp_req_if.snp_req_ready),
// Snoop response
.snp_rsp_valid (dcache_snp_rsp_if.snp_rsp_valid),
.snp_rsp_tag (dcache_snp_rsp_if.snp_rsp_tag),
.snp_rsp_ready (dcache_snp_rsp_if.snp_rsp_ready),
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
// Snoop Forward // Snoop forward out
.snp_fwd_valid (), .snp_fwdout_valid (),
.snp_fwd_addr (), .snp_fwdout_addr (),
.snp_fwd_ready (0) .snp_fwdout_tag (),
.snp_fwdout_ready (0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_tag (0),
.snp_fwdin_ready ()
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
); );
@@ -205,7 +231,7 @@ module VX_dmem_ctrl (
.DWBQ_SIZE (`IDWBQ_SIZE), .DWBQ_SIZE (`IDWBQ_SIZE),
.DFQQ_SIZE (`IDFQQ_SIZE), .DFQQ_SIZE (`IDFQQ_SIZE),
.LLVQ_SIZE (`ILLVQ_SIZE), .LLVQ_SIZE (`ILLVQ_SIZE),
.FFSQ_SIZE (`IFFSQ_SIZE), .SRPQ_SIZE (`ISRPQ_SIZE),
.PRFQ_SIZE (`IPRFQ_SIZE), .PRFQ_SIZE (`IPRFQ_SIZE),
.PRFQ_STRIDE (`IPRFQ_STRIDE), .PRFQ_STRIDE (`IPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
@@ -249,15 +275,27 @@ module VX_dmem_ctrl (
.dram_rsp_ready (icache_dram_rsp_if.dram_rsp_ready), .dram_rsp_ready (icache_dram_rsp_if.dram_rsp_ready),
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
// Snoop Request // Snoop request
.snp_req_valid (0), .snp_req_valid (0),
.snp_req_addr (0), .snp_req_addr (0),
.snp_req_tag (0),
.snp_req_ready (), .snp_req_ready (),
// Snoop Forward // Snoop response
.snp_fwd_valid (), .snp_rsp_valid (),
.snp_fwd_addr (), .snp_rsp_tag (),
.snp_fwd_ready (0) .snp_rsp_ready (0),
// Snoop forward out
.snp_fwdout_valid (),
.snp_fwdout_addr (),
.snp_fwdout_tag (),
.snp_fwdout_ready (0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_tag (0),
.snp_fwdin_ready ()
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
); );

View File

@@ -15,7 +15,7 @@ module VX_dram_arb #(
input wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] core_req_addr, input wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`BANK_LINE_WIDTH-1:0] core_req_data, input wire [NUM_REQUESTS-1:0][`BANK_LINE_WIDTH-1:0] core_req_data,
input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output reg [NUM_REQUESTS-1:0] core_req_ready, output wire [NUM_REQUESTS-1:0] core_req_ready,
// Core response // Core response
output wire [NUM_REQUESTS-1:0] core_rsp_valid, output wire [NUM_REQUESTS-1:0] core_rsp_valid,
@@ -24,11 +24,11 @@ module VX_dram_arb #(
input wire [NUM_REQUESTS-1:0] core_rsp_ready, input wire [NUM_REQUESTS-1:0] core_rsp_ready,
// DRAM request // DRAM request
output reg dram_req_read, output wire dram_req_read,
output reg dram_req_write, output wire dram_req_write,
output reg [`DRAM_ADDR_WIDTH-1:0] dram_req_addr, output wire [`DRAM_ADDR_WIDTH-1:0] dram_req_addr,
output reg [`BANK_LINE_WIDTH-1:0] dram_req_data, output wire [`BANK_LINE_WIDTH-1:0] dram_req_data,
output reg [DRAM_TAG_WIDTH-1:0] dram_req_tag, output wire [DRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready, input wire dram_req_ready,
// DRAM response // DRAM response
@@ -37,47 +37,34 @@ module VX_dram_arb #(
input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag, input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready output wire dram_rsp_ready
); );
reg [`REQS_BITS-1:0] bus_req_idx; reg [`REQS_BITS-1:0] bus_req_sel;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
bus_req_idx <= 0; bus_req_sel <= 0;
end else begin end else begin
bus_req_idx <= bus_req_idx + 1; bus_req_sel <= bus_req_sel + 1;
end end
end end
integer i; assign dram_req_read = core_req_read [bus_req_sel];
generate assign dram_req_write = core_req_write [bus_req_sel];
always @(*) begin assign dram_req_addr = core_req_addr [bus_req_sel];
dram_req_read = 0; assign dram_req_data = core_req_data [bus_req_sel];
dram_req_write = 0; assign dram_req_tag = {core_req_tag [bus_req_sel], (`REQS_BITS)'(bus_req_sel)};
dram_req_addr = 'z;
dram_req_data = 'z;
dram_req_tag = 'z;
for (i = 0; i < NUM_REQUESTS; i++) begin for (i = 0; i < NUM_REQUESTS; i++) begin
if (bus_req_idx == (`REQS_BITS)'(i)) begin assign core_req_ready[i] = dram_req_ready && (bus_req_sel == `REQS_BITS'(i));
dram_req_read = core_req_read[i];
dram_req_write = core_req_write[i];
dram_req_addr = core_req_addr[i];
dram_req_data = core_req_data[i];
dram_req_tag = {core_req_tag[i], (`REQS_BITS)'(i)};
core_req_ready[i] = dram_req_ready;
end else begin
core_req_ready[i] = 0;
end
end
end
endgenerate
genvar j;
wire [`REQS_BITS-1:0] bus_rsp_idx = dram_rsp_tag[`REQS_BITS-1:0];
for (j = 0; j < NUM_REQUESTS; j++) begin
assign core_rsp_valid[j] = dram_rsp_valid && (bus_rsp_idx == (`REQS_BITS)'(j));
assign core_rsp_data[j] = dram_rsp_data;
assign core_rsp_tag[j] = dram_rsp_tag[`REQS_BITS +: CORE_TAG_WIDTH];
end end
assign dram_rsp_ready = core_rsp_ready[bus_rsp_idx];
wire [`REQS_BITS-1:0] bus_rsp_sel = dram_rsp_tag[`REQS_BITS-1:0];
genvar i;
for (i = 0; i < NUM_REQUESTS; i++) begin
assign core_rsp_valid[i] = dram_rsp_valid && (bus_rsp_sel == `REQS_BITS'(i));
assign core_rsp_data[i] = dram_rsp_data;
assign core_rsp_tag[i] = dram_rsp_tag[`REQS_BITS +: CORE_TAG_WIDTH];
end
assign dram_rsp_ready = core_rsp_ready[bus_rsp_sel];
endmodule endmodule

View File

@@ -47,7 +47,7 @@ module VX_exec_unit (
genvar i; genvar i;
generate generate
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : alu_defs for (i = 0; i < `NUM_THREADS; i++) begin : alu_defs
VX_alu_unit alu_unit ( VX_alu_unit alu_unit (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -102,7 +102,7 @@ module VX_exec_unit (
wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data; wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data;
generate generate
for (i = 0; i < `NUM_THREADS; i=i+1) begin for (i = 0; i < `NUM_THREADS; i++) begin
assign duplicate_PC_data[i] = exec_unit_req_if.PC_next; assign duplicate_PC_data[i] = exec_unit_req_if.PC_next;
end end
endgenerate endgenerate

View File

@@ -39,7 +39,7 @@ module VX_gpr (
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask; wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
genvar i; genvar i;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin for (i = 0; i < `NUM_THREADS; i++) begin
wire local_write = write_enable & writeback_if.wb_valid[i]; wire local_write = write_enable & writeback_if.wb_valid[i];
assign write_bit_mask[i] = {`NUM_GPRS{~local_write}}; assign write_bit_mask[i] = {`NUM_GPRS{~local_write}};
end end
@@ -57,8 +57,8 @@ module VX_gpr (
`ifndef SYN `ifndef SYN
genvar j; genvar j;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin for (i = 0; i < `NUM_THREADS; i++) begin
for (j = 0; j < `NUM_GPRS; j = j + 1) begin for (j = 0; j < `NUM_GPRS; j++) begin
assign a_reg_data_uqual[i][j] = ((temp_a[i][j] === 1'dx) || cena_1 )? 1'b0 : temp_a[i][j]; assign a_reg_data_uqual[i][j] = ((temp_a[i][j] === 1'dx) || cena_1 )? 1'b0 : temp_a[i][j];
assign b_reg_data_uqual[i][j] = ((temp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : temp_b[i][j]; assign b_reg_data_uqual[i][j] = ((temp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : temp_b[i][j];
end end

View File

@@ -21,7 +21,7 @@ module VX_gpr_ram (
end else begin end else begin
if (we) begin if (we) begin
integer i; integer i;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin for (i = 0; i < `NUM_THREADS; i++) begin
if (be[i]) begin if (be[i]) begin
ram[waddr][i][0] <= wdata[i][7:0]; ram[waddr][i][0] <= wdata[i][7:0];
ram[waddr][i][1] <= wdata[i][15:8]; ram[waddr][i][1] <= wdata[i][15:8];

View File

@@ -17,7 +17,7 @@ module VX_gpr_wrapper (
wire[`NUM_THREADS-1:0][31:0] jal_data; wire[`NUM_THREADS-1:0][31:0] jal_data;
genvar i; genvar i;
generate generate
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : jal_data_assign for (i = 0; i < `NUM_THREADS; i++) begin : jal_data_assign
assign jal_data[i] = gpr_jal_if.curr_PC; assign jal_data[i] = gpr_jal_if.curr_PC;
end end
endgenerate endgenerate
@@ -47,7 +47,7 @@ module VX_gpr_wrapper (
`endif `endif
generate generate
for (i = 0; i < `NUM_WARPS; i = i + 1) begin : warp_gprs for (i = 0; i < `NUM_WARPS; i++) begin : warp_gprs
wire valid_write_request = i == writeback_if.warp_num; wire valid_write_request = i == writeback_if.warp_num;
VX_gpr gpr( VX_gpr gpr(
.clk (clk), .clk (clk),

View File

@@ -15,7 +15,7 @@ module VX_gpu_inst (
genvar i; genvar i;
generate generate
for (i = 0; i < `NUM_THREADS; i=i+1) begin : tmc_new_mask_init for (i = 0; i < `NUM_THREADS; i++) begin : tmc_new_mask_init
assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_inst_req_if.a_reg_data[0]; assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_inst_req_if.a_reg_data[0];
end end
endgenerate endgenerate
@@ -34,7 +34,7 @@ module VX_gpu_inst (
wire[`NUM_WARPS-1:0] wspawn_new_active; wire[`NUM_WARPS-1:0] wspawn_new_active;
generate generate
for (i = 0; i < `NUM_WARPS; i=i+1) begin : wspawn_new_active_init for (i = 0; i < `NUM_WARPS; i++) begin : wspawn_new_active_init
assign wspawn_new_active[i] = all_active ? 1 : i < gpu_inst_req_if.a_reg_data[0]; assign wspawn_new_active[i] = all_active ? 1 : i < gpu_inst_req_if.a_reg_data[0];
end end
endgenerate endgenerate
@@ -56,7 +56,7 @@ module VX_gpu_inst (
wire[`NUM_THREADS-1:0] split_new_later_mask; wire[`NUM_THREADS-1:0] split_new_later_mask;
generate generate
for (i = 0; i < `NUM_THREADS; i=i+1) begin : masks_init for (i = 0; i < `NUM_THREADS; i++) begin : masks_init
wire curr_bool = (gpu_inst_req_if.a_reg_data[i] == 32'b1); wire curr_bool = (gpu_inst_req_if.a_reg_data[i] == 32'b1);
assign split_new_use_mask[i] = curr_valids[i] & (curr_bool); assign split_new_use_mask[i] = curr_valids[i] & (curr_bool);
assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool); assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool);

View File

@@ -23,7 +23,7 @@ module VX_inst_multiplex (
genvar i; genvar i;
generate generate
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : mask_init for (i = 0; i < `NUM_THREADS; i++) begin : mask_init
assign is_mem_mask[i] = is_mem; assign is_mem_mask[i] = is_mem;
assign is_gpu_mask[i] = is_gpu; assign is_gpu_mask[i] = is_gpu;
assign is_csr_mask[i] = is_csr; assign is_csr_mask[i] = is_csr;

View File

@@ -8,7 +8,7 @@ module VX_lsu_addr_gen (
); );
genvar i; genvar i;
generate generate
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : addresses for (i = 0; i < `NUM_THREADS; i++) begin : addresses
assign address[i] = base_address[i] + offset; assign address[i] = base_address[i] + offset;
end end
endgenerate endgenerate

View File

@@ -52,7 +52,7 @@ module VX_scheduler (
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
for (w = 0; w < `NUM_WARPS; w=w+1) begin for (w = 0; w < `NUM_WARPS; w=w+1) begin
for (i = 0; i < 32; i = i + 1) begin for (i = 0; i < 32; i++) begin
rename_table[w][i] <= 0; rename_table[w][i] <= 0;
end end
end end

View File

@@ -260,7 +260,7 @@ module VX_warp_sched (
genvar i; genvar i;
generate generate
for (i = 0; i < `NUM_WARPS; i = i + 1) begin : stacks for (i = 0; i < `NUM_WARPS; i++) begin : stacks
wire correct_warp_s = (i == split_warp_num); wire correct_warp_s = (i == split_warp_num);
wire correct_warp_j = (i == join_warp_num); wire correct_warp_j = (i == join_warp_num);

View File

@@ -8,7 +8,7 @@ module Vortex #(
input wire clk, input wire clk,
input wire reset, input wire reset,
// DRAM Dcache Req // DRAM Dcache request
output wire D_dram_req_read, output wire D_dram_req_read,
output wire D_dram_req_write, output wire D_dram_req_write,
output wire [`DDRAM_ADDR_WIDTH-1:0] D_dram_req_addr, output wire [`DDRAM_ADDR_WIDTH-1:0] D_dram_req_addr,
@@ -16,13 +16,13 @@ module Vortex #(
output wire [`DDRAM_TAG_WIDTH-1:0] D_dram_req_tag, output wire [`DDRAM_TAG_WIDTH-1:0] D_dram_req_tag,
input wire D_dram_req_ready, input wire D_dram_req_ready,
// DRAM Dcache Rsp // DRAM Dcache reponse
input wire D_dram_rsp_valid, input wire D_dram_rsp_valid,
input wire [`DDRAM_LINE_WIDTH-1:0] D_dram_rsp_data, input wire [`DDRAM_LINE_WIDTH-1:0] D_dram_rsp_data,
input wire [`DDRAM_TAG_WIDTH-1:0] D_dram_rsp_tag, input wire [`DDRAM_TAG_WIDTH-1:0] D_dram_rsp_tag,
output wire D_dram_rsp_ready, output wire D_dram_rsp_ready,
// DRAM Icache Req // DRAM Icache request
output wire I_dram_req_read, output wire I_dram_req_read,
output wire I_dram_req_write, output wire I_dram_req_write,
output wire [`IDRAM_ADDR_WIDTH-1:0] I_dram_req_addr, output wire [`IDRAM_ADDR_WIDTH-1:0] I_dram_req_addr,
@@ -30,17 +30,22 @@ module Vortex #(
output wire [`IDRAM_TAG_WIDTH-1:0] I_dram_req_tag, output wire [`IDRAM_TAG_WIDTH-1:0] I_dram_req_tag,
input wire I_dram_req_ready, input wire I_dram_req_ready,
// DRAM Icache Rsp // DRAM Icache response
input wire I_dram_rsp_valid, input wire I_dram_rsp_valid,
input wire [`IDRAM_LINE_WIDTH-1:0] I_dram_rsp_data, input wire [`IDRAM_LINE_WIDTH-1:0] I_dram_rsp_data,
input wire [`IDRAM_TAG_WIDTH-1:0] I_dram_rsp_tag, input wire [`IDRAM_TAG_WIDTH-1:0] I_dram_rsp_tag,
output wire I_dram_rsp_ready, output wire I_dram_rsp_ready,
// Cache Snooping // Snoop request
input wire snp_req_valid, input wire snp_req_valid,
input wire [`DDRAM_ADDR_WIDTH-1:0] snp_req_addr, input wire [`DDRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire [`DSNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready, output wire snp_req_ready,
output wire snp_rsp_valid,
output wire [`DSNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// I/O request // I/O request
output wire io_req_read, output wire io_req_read,
output wire io_req_write, output wire io_req_write,
@@ -172,12 +177,24 @@ module Vortex #(
VX_warp_ctl_if warp_ctl_if(); VX_warp_ctl_if warp_ctl_if();
// Cache snooping // Cache snooping
VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if(); VX_cache_snp_req_if #(
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH(`DSNP_TAG_WIDTH)
) dcache_snp_req_if();
VX_cache_snp_rsp_if #(
.SNP_TAG_WIDTH(`DSNP_TAG_WIDTH)
) dcache_snp_rsp_if();
assign dcache_snp_req_if.snp_req_valid = snp_req_valid; assign dcache_snp_req_if.snp_req_valid = snp_req_valid;
assign dcache_snp_req_if.snp_req_addr = snp_req_addr; assign dcache_snp_req_if.snp_req_addr = snp_req_addr;
assign dcache_snp_req_if.snp_req_tag = snp_req_tag;
assign snp_req_ready = dcache_snp_req_if.snp_req_ready; assign snp_req_ready = dcache_snp_req_if.snp_req_ready;
assign snp_rsp_valid = dcache_snp_rsp_if.snp_rsp_valid;
assign snp_rsp_tag = dcache_snp_rsp_if.snp_rsp_tag;
assign dcache_snp_rsp_if.snp_rsp_ready = snp_rsp_ready;
VX_front_end #( VX_front_end #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) front_end ( ) front_end (
@@ -236,6 +253,7 @@ module Vortex #(
.dcache_dram_req_if (dcache_dram_req_if), .dcache_dram_req_if (dcache_dram_req_if),
.dcache_dram_rsp_if (dcache_dram_rsp_if), .dcache_dram_rsp_if (dcache_dram_rsp_if),
.dcache_snp_req_if (dcache_snp_req_if), .dcache_snp_req_if (dcache_snp_req_if),
.dcache_snp_rsp_if (dcache_snp_rsp_if),
// Core <-> Icache // Core <-> Icache
.icache_core_req_if (icache_core_req_if), .icache_core_req_if (icache_core_req_if),

View File

@@ -8,7 +8,7 @@ module Vortex_Cluster #(
input wire clk, input wire clk,
input wire reset, input wire reset,
// DRAM Req // DRAM request
output wire dram_req_read, output wire dram_req_read,
output wire dram_req_write, output wire dram_req_write,
output wire[`L2DRAM_ADDR_WIDTH-1:0] dram_req_addr, output wire[`L2DRAM_ADDR_WIDTH-1:0] dram_req_addr,
@@ -16,16 +16,22 @@ module Vortex_Cluster #(
output wire[`L2DRAM_TAG_WIDTH-1:0] dram_req_tag, output wire[`L2DRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready, input wire dram_req_ready,
// DRAM Rsp // DRAM response
input wire dram_rsp_valid, input wire dram_rsp_valid,
input wire[`L2DRAM_LINE_WIDTH-1:0] dram_rsp_data, input wire[`L2DRAM_LINE_WIDTH-1:0] dram_rsp_data,
input wire[`L2DRAM_TAG_WIDTH-1:0] dram_rsp_tag, input wire[`L2DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready, output wire dram_rsp_ready,
// Cache Snooping // Snoop request
input wire snp_req_valid, input wire snp_req_valid,
input wire[`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr, input wire[`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr,
output wire snp_req_ready, input wire[`L2SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop response
output wire snp_rsp_valid,
output wire[`L2SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// I/O request // I/O request
output wire io_req_read, output wire io_req_read,
@@ -69,9 +75,14 @@ module Vortex_Cluster #(
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag; wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready; wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
wire[`NUM_CORES-1:0] per_core_snp_fwd_valid; wire[`NUM_CORES-1:0] per_core_snp_req_valid;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_fwd_addr; wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_req_addr;
wire[`NUM_CORES-1:0] per_core_snp_fwd_ready; wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_req_tag;
wire[`NUM_CORES-1:0] per_core_snp_req_ready;
wire[`NUM_CORES-1:0] per_core_snp_rsp_valid;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_rsp_tag;
wire[`NUM_CORES-1:0] per_core_snp_rsp_ready;
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
wire[`NUM_CORES-1:0] per_core_io_req_read; wire[`NUM_CORES-1:0] per_core_io_req_read;
@@ -88,7 +99,7 @@ module Vortex_Cluster #(
wire[`NUM_CORES-1:0] per_core_ebreak; wire[`NUM_CORES-1:0] per_core_ebreak;
genvar i; genvar i;
for (i = 0; i < `NUM_CORES; i = i + 1) begin for (i = 0; i < `NUM_CORES; i++) begin
Vortex #( Vortex #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES)) .CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) vortex_core ( ) vortex_core (
@@ -118,9 +129,14 @@ module Vortex_Cluster #(
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]), .I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]), .I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.snp_req_valid (per_core_snp_fwd_valid [i]), .snp_req_valid (per_core_snp_req_valid [i]),
.snp_req_addr (per_core_snp_fwd_addr [i]), .snp_req_addr (per_core_snp_req_addr [i]),
.snp_req_ready (per_core_snp_fwd_ready [i]), .snp_req_tag (per_core_snp_req_tag [i]),
.snp_req_ready (per_core_snp_req_ready [i]),
.snp_rsp_valid (per_core_snp_rsp_valid [i]),
.snp_rsp_tag (per_core_snp_rsp_tag [i]),
.snp_rsp_ready (per_core_snp_rsp_ready [i]),
.io_req_read (per_core_io_req_read [i]), .io_req_read (per_core_io_req_read [i]),
.io_req_write (per_core_io_req_write [i]), .io_req_write (per_core_io_req_write [i]),
@@ -169,9 +185,14 @@ module Vortex_Cluster #(
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_rsp_tag; wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_rsp_tag;
wire l2_core_rsp_ready; wire l2_core_rsp_ready;
wire l2_snp_fwd_valid; wire[`NUM_CORES-1:0] l2_snp_fwdout_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] l2_snp_fwd_addr; wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] l2_snp_fwdout_addr;
wire l2_snp_fwd_ready; wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdout_tag;
wire[`NUM_CORES-1:0] l2_snp_fwdout_ready;
wire[`NUM_CORES-1:0] l2_snp_fwdin_valid;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdin_tag;
wire[`NUM_CORES-1:0] l2_snp_fwdin_ready;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]); assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]);
@@ -204,12 +225,17 @@ module Vortex_Cluster #(
assign per_core_D_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i]; assign per_core_D_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i];
assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1]; assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1];
assign per_core_snp_fwd_valid [(i/2)] = l2_snp_fwd_valid && l2_snp_fwd_ready; assign per_core_snp_req_valid [(i/2)] = l2_snp_fwdout_valid [(i/2)];
assign per_core_snp_fwd_addr [(i/2)] = l2_snp_fwd_addr; assign per_core_snp_req_addr [(i/2)] = l2_snp_fwdout_addr [(i/2)];
assign per_core_snp_req_tag [(i/2)] = l2_snp_fwdout_tag [(i/2)];
assign l2_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
assign l2_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)];
assign l2_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)];
assign per_core_snp_rsp_ready [(i/2)] = l2_snp_fwdin_ready [(i/2)];
end end
assign l2_core_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready); assign l2_core_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready);
assign l2_snp_fwd_ready = (& per_core_snp_fwd_ready);
VX_cache #( VX_cache #(
.CACHE_SIZE (`L2CACHE_SIZE), .CACHE_SIZE (`L2CACHE_SIZE),
@@ -226,7 +252,7 @@ module Vortex_Cluster #(
.DWBQ_SIZE (`L2DWBQ_SIZE), .DWBQ_SIZE (`L2DWBQ_SIZE),
.DFQQ_SIZE (`L2DFQQ_SIZE), .DFQQ_SIZE (`L2DFQQ_SIZE),
.LLVQ_SIZE (`L2LLVQ_SIZE), .LLVQ_SIZE (`L2LLVQ_SIZE),
.FFSQ_SIZE (`L2FFSQ_SIZE), .SRPQ_SIZE (`L2SRPQ_SIZE),
.PRFQ_SIZE (`L2PRFQ_SIZE), .PRFQ_SIZE (`L2PRFQ_SIZE),
.PRFQ_STRIDE (`L2PRFQ_STRIDE), .PRFQ_STRIDE (`L2PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
@@ -235,7 +261,10 @@ module Vortex_Cluster #(
.SNOOP_FORWARDING (1), .SNOOP_FORWARDING (1),
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH), .CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0), .CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.NUM_SNP_REQUESTS (`NUM_CORES),
.SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH)
) gpu_l2cache ( ) gpu_l2cache (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -267,17 +296,29 @@ module Vortex_Cluster #(
.dram_rsp_valid (dram_rsp_valid), .dram_rsp_valid (dram_rsp_valid),
.dram_rsp_tag (dram_rsp_tag), .dram_rsp_tag (dram_rsp_tag),
.dram_rsp_data (dram_rsp_data), .dram_rsp_data (dram_rsp_data),
.dram_rsp_ready (dram_rsp_ready), .dram_rsp_ready (dram_rsp_ready),
// Snoop request // Snoop request
.snp_req_valid (snp_req_valid), .snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr), .snp_req_addr (snp_req_addr),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready), .snp_req_ready (snp_req_ready),
// Snoop forwarding // Snoop response
.snp_fwd_valid (l2_snp_fwd_valid), .snp_rsp_valid (snp_rsp_valid),
.snp_fwd_addr (l2_snp_fwd_addr), .snp_rsp_tag (snp_rsp_tag),
.snp_fwd_ready (l2_snp_fwd_ready) .snp_rsp_ready (snp_rsp_ready),
// Snoop forwarding out
.snp_fwdout_valid (l2_snp_fwdout_valid),
.snp_fwdout_addr (l2_snp_fwdout_addr),
.snp_fwdout_tag (l2_snp_fwdout_tag),
.snp_fwdout_ready (l2_snp_fwdout_ready),
// Snoop forwarding in
.snp_fwdin_valid (l2_snp_fwdin_valid),
.snp_fwdin_tag (l2_snp_fwdin_tag),
.snp_fwdin_ready (l2_snp_fwdin_ready)
); );
end else begin end else begin
@@ -294,9 +335,14 @@ module Vortex_Cluster #(
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_core_rsp_tag; wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_core_rsp_tag;
wire[`L2NUM_REQUESTS-1:0] arb_core_rsp_ready; wire[`L2NUM_REQUESTS-1:0] arb_core_rsp_ready;
wire arb_snp_fwd_valid; wire[`NUM_CORES-1:0] arb_snp_fwdout_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] arb_snp_fwd_addr; wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_snp_fwdout_addr;
wire arb_snp_fwd_ready; wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdout_tag;
wire[`NUM_CORES-1:0] arb_snp_fwdout_ready;
wire[`NUM_CORES-1:0] arb_snp_fwdin_valid;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdin_tag;
wire[`NUM_CORES-1:0] arb_snp_fwdin_ready;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign arb_core_req_read [i] = per_core_D_dram_req_read[(i/2)]; assign arb_core_req_read [i] = per_core_D_dram_req_read[(i/2)];
@@ -329,15 +375,47 @@ module Vortex_Cluster #(
assign arb_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)]; assign arb_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)];
assign arb_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)]; assign arb_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)];
assign per_core_snp_fwd_valid [(i/2)] = arb_snp_fwd_valid && arb_snp_fwd_ready; assign per_core_snp_req_valid [(i/2)] = arb_snp_fwdout_valid [(i/2)];
assign per_core_snp_fwd_addr [(i/2)] = arb_snp_fwd_addr; assign per_core_snp_req_addr [(i/2)] = arb_snp_fwdout_addr [(i/2)];
end assign per_core_snp_req_tag [(i/2)] = arb_snp_fwdout_tag [(i/2)];
assign arb_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
assign arb_snp_fwd_valid = snp_req_valid;
assign arb_snp_fwd_addr = snp_req_addr;
assign arb_snp_fwd_ready = (& per_core_snp_fwd_ready);
assign snp_req_ready = arb_snp_fwd_ready; assign arb_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)];
assign arb_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)];
assign per_core_snp_rsp_ready [(i/2)] = arb_snp_fwdin_ready [(i/2)];
end
VX_snp_forwarder #(
.BANK_LINE_SIZE(`L2BANK_LINE_SIZE),
.NUM_REQUESTS(`NUM_CORES),
.SNRQ_SIZE(`L2SNRQ_SIZE),
.SNP_REQ_TAG_WIDTH(`L2SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH(`DSNP_TAG_WIDTH)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
`IGNORE_WARNINGS_BEGIN
.snp_rsp_addr (),
`IGNORE_WARNINGS_END
.snp_rsp_ready (snp_rsp_ready),
.snp_fwdout_valid (arb_snp_fwdout_valid),
.snp_fwdout_addr (arb_snp_fwdout_addr),
.snp_fwdout_tag (arb_snp_fwdout_tag),
.snp_fwdout_ready (arb_snp_fwdout_ready),
.snp_fwdin_valid (arb_snp_fwdin_valid),
.snp_fwdin_tag (arb_snp_fwdin_tag),
.snp_fwdin_ready (arb_snp_fwdin_ready)
);
VX_dram_arb #( VX_dram_arb #(
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE), .BANK_LINE_SIZE (`L2BANK_LINE_SIZE),

View File

@@ -15,16 +15,22 @@ module Vortex_Socket (
input wire dram_req_ready, input wire dram_req_ready,
// DRAM response // DRAM response
input wire dram_rsp_valid, input wire dram_rsp_valid,
input wire[`L3DRAM_LINE_WIDTH-1:0] dram_rsp_data, input wire[`L3DRAM_LINE_WIDTH-1:0] dram_rsp_data,
input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag, input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready, output wire dram_rsp_ready,
// Cache snooping // Snoop request
input wire snp_req_valid, input wire snp_req_valid,
input wire[`L3DRAM_ADDR_WIDTH-1:0] snp_req_addr, input wire[`L3DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire[`L3SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready, output wire snp_req_ready,
// Snoop response
output wire snp_rsp_valid,
output wire[`L3SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// I/O request // I/O request
output wire io_req_read, output wire io_req_read,
output wire io_req_write, output wire io_req_write,
@@ -66,8 +72,13 @@ module Vortex_Socket (
.snp_req_valid (snp_req_valid), .snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr), .snp_req_addr (snp_req_addr),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready), .snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready),
.io_req_read (io_req_read), .io_req_read (io_req_read),
.io_req_write (io_req_write), .io_req_write (io_req_write),
.io_req_addr (io_req_addr), .io_req_addr (io_req_addr),
@@ -99,9 +110,14 @@ module Vortex_Socket (
wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready; wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_valid; wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_ADDR_WIDTH-1:0] per_cluster_snp_fwd_addr; wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready; wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid;
wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready;
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_read; wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_read;
@@ -118,7 +134,7 @@ module Vortex_Socket (
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak; wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
genvar i; genvar i;
for (i = 0; i < `NUM_CLUSTERS; i=i+1) begin for (i = 0; i < `NUM_CLUSTERS; i++) begin
Vortex_Cluster #( Vortex_Cluster #(
.CLUSTER_ID(i) .CLUSTER_ID(i)
) Vortex_Cluster ( ) Vortex_Cluster (
@@ -137,9 +153,14 @@ module Vortex_Socket (
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]), .dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]), .dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
.snp_req_valid (per_cluster_snp_fwd_valid [i]), .snp_req_valid (per_cluster_snp_req_valid [i]),
.snp_req_addr (per_cluster_snp_fwd_addr [i]), .snp_req_addr (per_cluster_snp_req_addr [i]),
.snp_req_ready (per_cluster_snp_fwd_ready [i]), .snp_req_tag (per_cluster_snp_req_tag [i]),
.snp_req_ready (per_cluster_snp_req_ready [i]),
.snp_rsp_valid (per_cluster_snp_rsp_valid [i]),
.snp_rsp_tag (per_cluster_snp_rsp_tag [i]),
.snp_rsp_ready (per_cluster_snp_rsp_ready [i]),
.io_req_read (per_cluster_io_req_read [i]), .io_req_read (per_cluster_io_req_read [i]),
.io_req_write (per_cluster_io_req_write [i]), .io_req_write (per_cluster_io_req_write [i]),
@@ -185,11 +206,16 @@ module Vortex_Socket (
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag; wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
wire l3_core_rsp_ready; wire l3_core_rsp_ready;
wire l3_snp_fwd_valid; wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] l3_snp_fwd_addr; wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr;
wire l3_snp_fwd_ready; wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag;
wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready;
for (i = 0; i < `L3NUM_REQUESTS; i=i+1) begin wire[`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid;
wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag;
wire[`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready;
for (i = 0; i < `L3NUM_REQUESTS; i++) begin
// Core Request // Core Request
assign l3_core_req_valid [i] = (per_cluster_dram_req_read [i] | per_cluster_dram_req_write [i]); assign l3_core_req_valid [i] = (per_cluster_dram_req_read [i] | per_cluster_dram_req_write [i]);
assign l3_core_req_read [i] = per_cluster_dram_req_read [i] ? `BYTE_EN_LW : `BYTE_EN_NO; assign l3_core_req_read [i] = per_cluster_dram_req_read [i] ? `BYTE_EN_LW : `BYTE_EN_NO;
@@ -203,13 +229,19 @@ module Vortex_Socket (
assign per_cluster_dram_rsp_data [i] = l3_core_rsp_data [i]; assign per_cluster_dram_rsp_data [i] = l3_core_rsp_data [i];
assign per_cluster_dram_rsp_tag [i] = l3_core_rsp_tag [i]; assign per_cluster_dram_rsp_tag [i] = l3_core_rsp_tag [i];
// Snoop Forwarding // Snoop Forwarding out
assign per_cluster_snp_fwd_valid [i] = l3_snp_fwd_valid && l3_snp_fwd_ready; assign per_cluster_snp_req_valid [i] = l3_snp_fwdout_valid[i];
assign per_cluster_snp_fwd_addr [i] = l3_snp_fwd_addr; assign per_cluster_snp_req_addr [i] = l3_snp_fwdout_addr[i];
assign per_cluster_snp_req_tag [i] = l3_snp_fwdout_tag[i];
assign l3_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i];
// Snoop Forwarding in
assign l3_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i];
assign l3_snp_fwdin_tag [i] = per_cluster_snp_rsp_tag [i];
assign per_cluster_snp_rsp_ready [i] = l3_snp_fwdin_ready [i];
end end
assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready); assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready);
assign l3_snp_fwd_ready = (& per_cluster_snp_fwd_ready);
VX_cache #( VX_cache #(
.CACHE_SIZE (`L3CACHE_SIZE), .CACHE_SIZE (`L3CACHE_SIZE),
@@ -226,7 +258,7 @@ module Vortex_Socket (
.DWBQ_SIZE (`L3DWBQ_SIZE), .DWBQ_SIZE (`L3DWBQ_SIZE),
.DFQQ_SIZE (`L3DFQQ_SIZE), .DFQQ_SIZE (`L3DFQQ_SIZE),
.LLVQ_SIZE (`L3LLVQ_SIZE), .LLVQ_SIZE (`L3LLVQ_SIZE),
.FFSQ_SIZE (`L3FFSQ_SIZE), .SRPQ_SIZE (`L3SRPQ_SIZE),
.PRFQ_SIZE (`L3PRFQ_SIZE), .PRFQ_SIZE (`L3PRFQ_SIZE),
.PRFQ_STRIDE (`L3PRFQ_STRIDE), .PRFQ_STRIDE (`L3PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
@@ -235,7 +267,10 @@ module Vortex_Socket (
.SNOOP_FORWARDING (1), .SNOOP_FORWARDING (1),
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH), .CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0), .CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH) .DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH),
.NUM_SNP_REQUESTS (`NUM_CLUSTERS),
.SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH)
) gpu_l3cache ( ) gpu_l3cache (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -272,12 +307,24 @@ module Vortex_Socket (
// Snoop request // Snoop request
.snp_req_valid (snp_req_valid), .snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr), .snp_req_addr (snp_req_addr),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready), .snp_req_ready (snp_req_ready),
// Snoop forwarding // Snoop response
.snp_fwd_valid (l3_snp_fwd_valid), .snp_rsp_valid (snp_rsp_valid),
.snp_fwd_addr (l3_snp_fwd_addr), .snp_rsp_tag (snp_rsp_tag),
.snp_fwd_ready (l3_snp_fwd_ready) .snp_rsp_ready (snp_rsp_ready),
// Snoop forwarding out
.snp_fwdout_valid (l3_snp_fwdout_valid),
.snp_fwdout_addr (l3_snp_fwdout_addr),
.snp_fwdout_tag (l3_snp_fwdout_tag),
.snp_fwdout_ready (l3_snp_fwdout_ready),
// Snoop forwarding in
.snp_fwdin_valid (l3_snp_fwdin_valid),
.snp_fwdin_tag (l3_snp_fwdin_tag),
.snp_fwdin_ready (l3_snp_fwdin_ready)
); );
end end

203
hw/rtl/cache/VX_bank.v vendored
View File

@@ -21,7 +21,7 @@ module VX_bank #(
parameter MRVQ_SIZE = 0, parameter MRVQ_SIZE = 0,
// Dram Fill Rsp Queue Size // Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 0, parameter DFPQ_SIZE = 0,
// Snoop Req Queue // Snoop Req Queue Size
parameter SNRQ_SIZE = 0, parameter SNRQ_SIZE = 0,
// Queues for writebacks Knobs {1, 2, 4, 8, ...} // Queues for writebacks Knobs {1, 2, 4, 8, ...}
@@ -33,8 +33,8 @@ module VX_bank #(
parameter DFQQ_SIZE = 0, parameter DFQQ_SIZE = 0,
// Lower Level Cache Hit Queue Size // Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 0, parameter LLVQ_SIZE = 0,
// Fill Forward SNP Queue // Snoop Rsp Queue Size
parameter FFSQ_SIZE = 0, parameter SRPQ_SIZE = 0,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 0, parameter FILL_INVALIDAOR_SIZE = 0,
@@ -52,33 +52,34 @@ module VX_bank #(
parameter CORE_TAG_WIDTH = 0, parameter CORE_TAG_WIDTH = 0,
// size of tag id in core request tag // size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0 parameter CORE_TAG_ID_BITS = 0,
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
// Core Request // Core Request
input wire core_req_ready,
input wire [NUM_REQUESTS-1:0] core_req_valids, input wire [NUM_REQUESTS-1:0] core_req_valids,
input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_read, input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_read,
input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_write, input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_write,
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr, input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data, input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_full, output wire core_req_ready,
// Core Response // Core Response
output wire core_rsp_valid, output wire core_rsp_valid,
output wire [`REQS_BITS-1:0] core_rsp_tid, output wire [`REQS_BITS-1:0] core_rsp_tid,
output wire [`WORD_WIDTH-1:0] core_rsp_data, output wire [`WORD_WIDTH-1:0] core_rsp_data,
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag, output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_pop, input wire core_rsp_ready,
// Dram Fill Requests // Dram Fill Requests
output wire dram_fill_req_valid, output wire dram_fill_req_valid,
output wire[`LINE_ADDR_WIDTH-1:0] dram_fill_req_addr, output wire[`LINE_ADDR_WIDTH-1:0] dram_fill_req_addr,
output wire dram_fill_req_is_snp, input wire dram_fill_req_ready,
input wire dram_fill_req_full,
// Dram Fill Response // Dram Fill Response
input wire dram_fill_rsp_valid, input wire dram_fill_rsp_valid,
@@ -90,57 +91,47 @@ module VX_bank #(
output wire dram_wb_req_valid, output wire dram_wb_req_valid,
output wire [`LINE_ADDR_WIDTH-1:0] dram_wb_req_addr, output wire [`LINE_ADDR_WIDTH-1:0] dram_wb_req_addr,
output wire [`BANK_LINE_WIDTH-1:0] dram_wb_req_data, output wire [`BANK_LINE_WIDTH-1:0] dram_wb_req_data,
input wire dram_wb_req_pop, input wire dram_wb_req_ready,
// Snp Request // Snp Request
input wire snp_req_valid, input wire snp_req_valid,
input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr, input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr,
output wire snp_req_full, input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
output wire snp_fwd_valid, output wire snp_rsp_valid,
output wire [`LINE_ADDR_WIDTH-1:0] snp_fwd_addr, output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_fwd_pop input wire snp_rsp_ready
); );
reg snoop_state = 0; wire snrq_pop;
wire snrq_empty;
always @(posedge clk) begin wire snrq_full;
if (reset) begin
snoop_state <= 0;
end else begin
snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING;
end
end
wire snrq_pop;
wire snrq_empty;
wire snrq_valid_st0;
wire[`LINE_ADDR_WIDTH-1:0] snrq_addr_st0;
assign snrq_valid_st0 = !snrq_empty;
wire [`LINE_ADDR_WIDTH-1:0] snrq_addr_st0;
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st0;
VX_generic_queue #( VX_generic_queue #(
.DATAW(`LINE_ADDR_WIDTH), .DATAW(`LINE_ADDR_WIDTH + SNP_REQ_TAG_WIDTH),
.SIZE(SNRQ_SIZE) .SIZE(SNRQ_SIZE)
) snr_queue ( ) snp_req_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (snp_req_valid), .push (snp_req_valid),
.data_in (snp_req_addr), .data_in ({snp_req_addr, snp_req_tag}),
.pop (snrq_pop), .pop (snrq_pop),
.data_out(snrq_addr_st0), .data_out({snrq_addr_st0, snrq_tag_st0}),
.empty (snrq_empty), .empty (snrq_empty),
.full (snp_req_full) .full (snrq_full)
); );
assign snp_req_ready = ~snrq_full;
wire dfpq_pop; wire dfpq_pop;
wire dfpq_empty; wire dfpq_empty;
wire dfpq_full; wire dfpq_full;
wire [`LINE_ADDR_WIDTH-1:0] dfpq_addr_st0; wire [`LINE_ADDR_WIDTH-1:0] dfpq_addr_st0;
wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0; wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0;
assign dram_fill_rsp_ready = !dfpq_full;
VX_generic_queue #( VX_generic_queue #(
.DATAW(`LINE_ADDR_WIDTH + $bits(dram_fill_rsp_data)), .DATAW(`LINE_ADDR_WIDTH + $bits(dram_fill_rsp_data)),
@@ -156,9 +147,12 @@ module VX_bank #(
.full (dfpq_full) .full (dfpq_full)
); );
assign dram_fill_rsp_ready = !dfpq_full;
wire reqq_pop; wire reqq_pop;
wire reqq_push; wire reqq_push;
wire reqq_empty; wire reqq_empty;
wire reqq_full;
wire reqq_req_st0; wire reqq_req_st0;
wire[`REQS_BITS-1:0] reqq_req_tid_st0; wire[`REQS_BITS-1:0] reqq_req_tid_st0;
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
@@ -169,14 +163,12 @@ module VX_bank #(
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0; wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0;
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0; wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0;
assign reqq_push = core_req_ready && (| core_req_valids);
VX_cache_req_queue #( VX_cache_req_queue #(
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS), .NUM_REQUESTS (NUM_REQUESTS),
.REQQ_SIZE (REQQ_SIZE), .REQQ_SIZE (REQQ_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) .CORE_TAG_ID_BITS(CORE_TAG_ID_BITS)
) req_queue ( ) req_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -199,8 +191,11 @@ module VX_bank #(
.reqq_req_mem_read_st0 (reqq_req_mem_read_st0), .reqq_req_mem_read_st0 (reqq_req_mem_read_st0),
.reqq_req_mem_write_st0(reqq_req_mem_write_st0), .reqq_req_mem_write_st0(reqq_req_mem_write_st0),
.reqq_empty (reqq_empty), .reqq_empty (reqq_empty),
.reqq_full (core_req_full) .reqq_full (reqq_full)
); );
assign core_req_ready = ~reqq_full;
assign reqq_push = (| core_req_valids) && core_req_ready;
wire mrvq_pop; wire mrvq_pop;
wire mrvq_full; wire mrvq_full;
@@ -237,7 +232,7 @@ module VX_bank #(
integer j; integer j;
always @(*) begin always @(*) begin
is_fill_in_pipe = 0; is_fill_in_pipe = 0;
for (j = 0; j < STAGE_1_CYCLES; j=j+1) begin for (j = 0; j < STAGE_1_CYCLES; j++) begin
if (is_fill_st1[j]) begin if (is_fill_st1[j]) begin
is_fill_in_pipe = 1; is_fill_in_pipe = 1;
end end
@@ -251,7 +246,7 @@ module VX_bank #(
assign mrvq_pop = mrvq_valid_st0 && !stall_bank_pipe; assign mrvq_pop = mrvq_valid_st0 && !stall_bank_pipe;
assign dfpq_pop = !mrvq_pop && !dfpq_empty && !stall_bank_pipe; assign dfpq_pop = !mrvq_pop && !dfpq_empty && !stall_bank_pipe;
assign reqq_pop = !mrvq_stop && !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !is_fill_in_pipe; assign reqq_pop = !mrvq_stop && !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !is_fill_in_pipe;
assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && snrq_valid_st0 && !stall_bank_pipe; assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && !snrq_empty && !stall_bank_pipe;
wire qual_is_fill_st0; wire qual_is_fill_st0;
wire qual_valid_st0; wire qual_valid_st0;
@@ -262,7 +257,7 @@ module VX_bank #(
wire [`BANK_LINE_WIDTH-1:0] qual_writedata_st0; wire [`BANK_LINE_WIDTH-1:0] qual_writedata_st0;
wire [`REQ_INST_META_WIDTH-1:0] qual_inst_meta_st0; wire [`REQ_INST_META_WIDTH-1:0] qual_inst_meta_st0;
wire qual_going_to_write_st0; wire qual_going_to_write_st0;
wire qual_is_snp; wire qual_is_snp_st0;
wire valid_st1 [STAGE_1_CYCLES-1:0]; wire valid_st1 [STAGE_1_CYCLES-1:0];
wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0]; wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0];
@@ -270,6 +265,7 @@ module VX_bank #(
wire [`WORD_WIDTH-1:0] writeword_st1 [STAGE_1_CYCLES-1:0]; wire [`WORD_WIDTH-1:0] writeword_st1 [STAGE_1_CYCLES-1:0];
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0]; wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0];
wire [`BANK_LINE_WIDTH-1:0] writedata_st1 [STAGE_1_CYCLES-1:0]; wire [`BANK_LINE_WIDTH-1:0] writedata_st1 [STAGE_1_CYCLES-1:0];
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st1 [STAGE_1_CYCLES-1:0];
wire is_snp_st1 [STAGE_1_CYCLES-1:0]; wire is_snp_st1 [STAGE_1_CYCLES-1:0];
assign qual_is_fill_st0 = dfpq_pop; assign qual_is_fill_st0 = dfpq_pop;
@@ -298,34 +294,34 @@ module VX_bank #(
(snrq_pop) ? 1 : (snrq_pop) ? 1 :
0; 0;
assign qual_is_snp = snrq_pop ? 1 : 0; assign qual_is_snp_st0 = snrq_pop ? 1 : 0;
assign qual_writeword_st0 = mrvq_pop ? mrvq_writeword_st0 : assign qual_writeword_st0 = mrvq_pop ? mrvq_writeword_st0 :
reqq_pop ? reqq_req_writeword_st0 : reqq_pop ? reqq_req_writeword_st0 :
0; 0;
VX_generic_register #( VX_generic_register #(
.N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) .N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH + SNP_REQ_TAG_WIDTH)
) s0_1_c0 ( ) s0_1_c0 (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall_bank_pipe), .stall (stall_bank_pipe),
.flush (0), .flush (0),
.in ({qual_is_snp, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), .in ({qual_is_snp_st0, snrq_tag_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
.out ({is_snp_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]}) .out ({is_snp_st1[0], snrq_tag_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
); );
genvar i; genvar i;
for (i = 1; i < STAGE_1_CYCLES; i = i + 1) begin for (i = 1; i < STAGE_1_CYCLES; i++) begin
VX_generic_register #( VX_generic_register #(
.N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) .N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH + SNP_REQ_TAG_WIDTH)
) s0_1_cc ( ) s0_1_cc (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_bank_pipe), .stall(stall_bank_pipe),
.flush(0), .flush(0),
.in ({is_snp_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}), .in ({is_snp_st1[i-1], snrq_tag_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}),
.out ({is_snp_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]}) .out ({is_snp_st1[i], snrq_tag_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]})
); );
end end
@@ -355,10 +351,10 @@ module VX_bank #(
.DRAM_ENABLE (DRAM_ENABLE), .DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE) .WRITE_ENABLE (WRITE_ENABLE)
) tag_data_access ( ) tag_data_access (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall_bank_pipe), .stall (stall_bank_pipe),
.stall_bank_pipe(stall_bank_pipe), .stall_bank_pipe (stall_bank_pipe),
// Initial Read // Initial Read
.readaddr_st10 (addr_st1[0][`LINE_SELECT_BITS-1:0]), .readaddr_st10 (addr_st1[0][`LINE_SELECT_BITS-1:0]),
@@ -397,17 +393,18 @@ module VX_bank #(
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2; wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2;
wire [`TAG_SELECT_BITS-1:0] readtag_st2; wire [`TAG_SELECT_BITS-1:0] readtag_st2;
wire fill_saw_dirty_st2; wire fill_saw_dirty_st2;
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st2;
wire is_snp_st2; wire is_snp_st2;
VX_generic_register #( VX_generic_register #(
.N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `REQ_INST_META_WIDTH) .N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `REQ_INST_META_WIDTH + SNP_REQ_TAG_WIDTH)
) st_1e_2 ( ) st_1e_2 (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_bank_pipe), .stall(stall_bank_pipe),
.flush(0), .flush(0),
.in ({is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), .in ({is_snp_st1e, snrq_tag_st1[STAGE_1_CYCLES-1], fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
.out ({is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 }) .out ({is_snp_st2 , snrq_tag_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 })
); );
wire should_flush; wire should_flush;
@@ -415,7 +412,7 @@ module VX_bank #(
wire cwbq_full; wire cwbq_full;
wire dwbq_full; wire dwbq_full;
wire ffsq_full; wire srpq_full;
wire invalidate_fill; wire invalidate_fill;
// Enqueue to miss reserv if it's a valid miss // Enqueue to miss reserv if it's a valid miss
@@ -424,11 +421,11 @@ module VX_bank #(
&& miss_st2 && miss_st2
&& !mrvq_full && !mrvq_full
&& !(should_flush && dwbq_push) && !(should_flush && dwbq_push)
&& !((is_snp_st2 && valid_st2 && ffsq_full) && !((is_snp_st2 && valid_st2 && srpq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full) || ((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); || (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready));
assign miss_add_addr = addr_st2; assign miss_add_addr = addr_st2;
assign miss_add_wsel = wsel_st2; assign miss_add_wsel = wsel_st2;
@@ -474,21 +471,23 @@ module VX_bank #(
); );
// Enqueue to CWB Queue // Enqueue to CWB Queue
// TODO: should investigae the need for "SNOOP_FORWARDING" here
wire cwbq_push = (valid_st2 && !miss_st2) wire cwbq_push = (valid_st2 && !miss_st2)
&& !cwbq_full && !cwbq_full
&& (miss_add_mem_write == `BYTE_EN_NO) && (miss_add_mem_write == `BYTE_EN_NO)
&& !((is_snp_st2 && valid_st2 && ffsq_full) && !((is_snp_st2 && valid_st2 && srpq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); || (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready));
wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2; wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2;
wire [`REQS_BITS-1:0] cwbq_tid = miss_add_tid; wire [`REQS_BITS-1:0] cwbq_tid = miss_add_tid;
wire [CORE_TAG_WIDTH-1:0] cwbq_tag = miss_add_tag; wire [CORE_TAG_WIDTH-1:0] cwbq_tag = miss_add_tag;
wire cwbq_empty; wire cwbq_empty;
wire cwbq_pop;
assign core_rsp_valid = !cwbq_empty; assign core_rsp_valid = !cwbq_empty;
assign cwbq_pop = core_rsp_valid && core_rsp_ready;
VX_generic_queue #( VX_generic_queue #(
.DATAW(`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH), .DATAW(`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH),
@@ -500,29 +499,28 @@ module VX_bank #(
.push (cwbq_push), .push (cwbq_push),
.data_in ({cwbq_tid, cwbq_tag, cwbq_data}), .data_in ({cwbq_tid, cwbq_tag, cwbq_data}),
.pop (core_rsp_pop), .pop (cwbq_pop),
.data_out({core_rsp_tid, core_rsp_tag, core_rsp_data}), .data_out({core_rsp_tid, core_rsp_tag, core_rsp_data}),
.empty (cwbq_empty), .empty (cwbq_empty),
.full (cwbq_full) .full (cwbq_full)
); );
assign should_flush = snoop_state assign should_flush = valid_st2
&& valid_st2
&& (miss_add_mem_write != `BYTE_EN_NO) && (miss_add_mem_write != `BYTE_EN_NO)
&& !is_snp_st2 && !is_fill_st2; && !is_snp_st2
&& !is_fill_st2;
// Enqueue to DWB Queue // Enqueue to DWB Queue
assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush) assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush)
&& !dwbq_full && !dwbq_full
&& !((is_snp_st2 && valid_st2 && ffsq_full) && !((is_snp_st2 && valid_st2 && srpq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full) || ((valid_st2 && !miss_st2) && cwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); || (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready));
wire[`LINE_ADDR_WIDTH-1:0] dwbq_req_addr; wire[`LINE_ADDR_WIDTH-1:0] dwbq_req_addr;
wire dwbq_empty;
wire[`BANK_LINE_WIDTH-1:0] dwbq_req_data; wire[`BANK_LINE_WIDTH-1:0] dwbq_req_data;
wire dwbq_empty;
if (SNOOP_FORWARDING) begin if (SNOOP_FORWARDING) begin
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2; assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
@@ -532,7 +530,7 @@ module VX_bank #(
assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]}; assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
end end
wire possible_fill = valid_st2 && miss_st2 && !dram_fill_req_full && !is_snp_st2; wire possible_fill = valid_st2 && miss_st2 && dram_fill_req_ready && ~is_snp_st2;
wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2; wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2;
VX_fill_invalidator #( VX_fill_invalidator #(
@@ -549,9 +547,8 @@ module VX_bank #(
); );
// Enqueue in dram_fill_req // Enqueue in dram_fill_req
assign dram_fill_req_valid = possible_fill && !invalidate_fill; assign dram_fill_req_valid = possible_fill && !invalidate_fill;
assign dram_fill_req_is_snp = is_snp_st2 && valid_st2 && miss_st2; assign dram_fill_req_addr = addr_st2;
assign dram_fill_req_addr = addr_st2;
assign dram_wb_req_valid = !dwbq_empty; assign dram_wb_req_valid = !dwbq_empty;
@@ -565,43 +562,43 @@ module VX_bank #(
.push (dwbq_push), .push (dwbq_push),
.data_in ({dwbq_req_addr, dwbq_req_data}), .data_in ({dwbq_req_addr, dwbq_req_data}),
.pop (dram_wb_req_pop), .pop (dram_wb_req_ready),
.data_out({dram_wb_req_addr, dram_wb_req_data}), .data_out({dram_wb_req_addr, dram_wb_req_data}),
.empty (dwbq_empty), .empty (dwbq_empty),
.full (dwbq_full) .full (dwbq_full)
); );
wire snp_fwd_push; wire snp_rsp_push;
wire ffsq_empty; wire srpq_empty;
assign snp_fwd_push = is_snp_st2 assign snp_rsp_push = is_snp_st2
&& valid_st2 && valid_st2
&& !ffsq_full && !srpq_full
&& !(((valid_st2 && !miss_st2) && cwbq_full) && !(((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full)); || (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready));
assign snp_fwd_valid = !ffsq_empty; assign snp_rsp_valid = !srpq_empty;
VX_generic_queue #( VX_generic_queue #(
.DATAW(`LINE_ADDR_WIDTH), .DATAW(SNP_REQ_TAG_WIDTH),
.SIZE(FFSQ_SIZE) .SIZE(SRPQ_SIZE)
) ffs_queue ( ) snp_rsp_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (snp_fwd_push), .push (snp_rsp_push),
.data_in (addr_st2), .data_in (snrq_tag_st2),
.pop (snp_fwd_pop), .pop (snp_rsp_ready),
.data_out(snp_fwd_addr), .data_out(snp_rsp_tag),
.empty (ffsq_empty), .empty (srpq_empty),
.full (ffsq_full) .full (srpq_full)
); );
assign stall_bank_pipe = (is_snp_st2 && valid_st2 && ffsq_full) assign stall_bank_pipe = (is_snp_st2 && valid_st2 && srpq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full) || ((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full); || (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready);
endmodule : VX_bank endmodule : VX_bank

View File

@@ -22,7 +22,7 @@ module VX_cache #(
parameter MRVQ_SIZE = 8, parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size // Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2, parameter DFPQ_SIZE = 2,
// Snoop Req Queue // Snoop Req Queue Size
parameter SNRQ_SIZE = 8, parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...} // Queues for writebacks Knobs {1, 2, 4, 8, ...}
@@ -34,8 +34,8 @@ module VX_cache #(
parameter DFQQ_SIZE = 8, parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size // Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16, parameter LLVQ_SIZE = 16,
// Fill Forward SNP Queue // Snoop Rsp Queue Size
parameter FFSQ_SIZE = 8, parameter SRPQ_SIZE = 8,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
@@ -60,7 +60,16 @@ module VX_cache #(
parameter CORE_TAG_ID_BITS = 0, parameter CORE_TAG_ID_BITS = 0,
// dram request tag size // dram request tag size
parameter DRAM_TAG_WIDTH = 1 parameter DRAM_TAG_WIDTH = 1,
// Number of snoop forwarding requests
parameter NUM_SNP_REQUESTS = 2,
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 1,
// Snooping forward tag width
parameter SNP_FWD_TAG_WIDTH = 1
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@@ -94,56 +103,117 @@ module VX_cache #(
input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag, input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready, output wire dram_rsp_ready,
// Snoop Req // Snoop request
input wire snp_req_valid, input wire snp_req_valid,
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr, input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready, output wire snp_req_ready,
// Snoop Forward // Snoop response
output wire snp_fwd_valid, output wire snp_rsp_valid,
output wire [`DRAM_ADDR_WIDTH-1:0] snp_fwd_addr, output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_fwd_ready input wire snp_rsp_ready,
// Snoop Forwarding out
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_valid,
output wire [NUM_SNP_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdout_tag,
`IGNORE_WARNINGS_BEGIN
input wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_ready,
// Snoop forwarding in
input wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_valid,
input wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdin_tag,
`IGNORE_WARNINGS_END
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready
); );
wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valids; wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valids;
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
wire [NUM_BANKS-1:0] per_bank_core_rsp_pop;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid; wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data; wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag; wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
wire dfqq_full;
wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid; wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid;
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr; wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr;
wire dram_fill_req_ready;
wire [NUM_BANKS-1:0] per_bank_dram_fill_rsp_ready; wire [NUM_BANKS-1:0] per_bank_dram_fill_rsp_ready;
wire [NUM_BANKS-1:0] per_bank_dram_wb_queue_pop; wire [NUM_BANKS-1:0] per_bank_dram_wb_req_ready;
wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid; wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid;
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr; wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr;
wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data; wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data;
wire [NUM_BANKS-1:0] per_bank_reqq_full; wire [NUM_BANKS-1:0] per_bank_snp_req_ready;
wire [NUM_BANKS-1:0] per_bank_snp_req_full;
wire [NUM_BANKS-1:0] per_bank_snp_fwd_valid; wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid;
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_snp_fwd_addr; wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_snp_fwd_pop; wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready;
`DEBUG_BEGIN wire snp_req_valid_qual;
wire [NUM_BANKS-1:0] per_bank_dram_fill_req_is_snp; wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual;
`DEBUG_END wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag_qual;
wire snp_req_ready_qual;
assign dram_req_tag = dram_req_addr; if (SNOOP_FORWARDING) begin
assign core_req_ready = ~(| per_bank_reqq_full); VX_snp_forwarder #(
assign snp_req_ready = ~(| per_bank_snp_req_full); .BANK_LINE_SIZE (BANK_LINE_SIZE),
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready); .NUM_REQUESTS (NUM_SNP_REQUESTS),
.SNRQ_SIZE (SNRQ_SIZE),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (SNP_FWD_TAG_WIDTH)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_req_valid_qual),
.snp_rsp_addr (snp_req_addr_qual),
.snp_rsp_tag (snp_req_tag_qual),
.snp_rsp_ready (snp_req_ready_qual),
.snp_fwdout_valid (snp_fwdout_valid),
.snp_fwdout_addr (snp_fwdout_addr),
.snp_fwdout_tag (snp_fwdout_tag),
.snp_fwdout_ready (snp_fwdout_ready),
.snp_fwdin_valid (snp_fwdin_valid),
.snp_fwdin_tag (snp_fwdin_tag),
.snp_fwdin_ready (snp_fwdin_ready)
);
end else begin
assign snp_fwdout_valid = 0;
assign snp_fwdout_addr = 0;
assign snp_fwdout_tag = 0;
assign snp_fwdin_ready = 0;
assign snp_req_valid_qual = snp_req_valid;
assign snp_req_addr_qual = snp_req_addr;
assign snp_req_tag_qual = snp_req_tag;
assign snp_req_ready = snp_req_ready_qual;
end
assign dram_req_tag = dram_req_addr;
assign core_req_ready = (& per_bank_core_req_ready);
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
assign snp_req_ready_qual = (& per_bank_snp_req_ready);
VX_cache_core_req_bank_sel #( VX_cache_core_req_bank_sel #(
.BANK_LINE_SIZE (BANK_LINE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS) .NUM_REQUESTS (NUM_REQUESTS)
) cache_core_req_bank_sell ( ) cache_core_req_bank_sell (
.core_req_valid (core_req_valid), .core_req_valid (core_req_valid),
.core_req_addr (core_req_addr), .core_req_addr (core_req_addr),
@@ -152,7 +222,7 @@ module VX_cache #(
genvar i; genvar i;
generate generate
for (i = 0; i < NUM_BANKS; i = i + 1) begin for (i = 0; i < NUM_BANKS; i++) begin
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valids; wire [NUM_REQUESTS-1:0] curr_bank_core_req_valids;
wire [NUM_REQUESTS-1:0][31:0] curr_bank_core_req_addr; wire [NUM_REQUESTS-1:0][31:0] curr_bank_core_req_addr;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
@@ -160,58 +230,57 @@ module VX_cache #(
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_read; wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_read;
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_write; wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_write;
wire curr_bank_core_rsp_pop;
wire curr_bank_core_rsp_valid; wire curr_bank_core_rsp_valid;
wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid; wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid;
wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data; wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag; wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;
wire curr_bank_core_rsp_ready;
wire curr_bank_dram_fill_rsp_valid; wire curr_bank_dram_fill_rsp_valid;
wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_fill_rsp_data; wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_fill_rsp_data;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_rsp_addr; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_rsp_addr;
wire curr_bank_dram_fill_rsp_ready; wire curr_bank_dram_fill_rsp_ready;
wire curr_bank_dram_fill_req_full;
wire curr_bank_dram_fill_req_valid; wire curr_bank_dram_fill_req_valid;
wire curr_bank_dram_fill_req_is_snp;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_req_addr; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_req_addr;
wire curr_bank_dram_fill_req_ready;
wire curr_bank_dram_wb_req_pop;
wire curr_bank_dram_wb_req_valid; wire curr_bank_dram_wb_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_wb_req_addr; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_wb_req_addr;
wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_wb_req_data; wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_wb_req_data;
wire curr_bank_dram_wb_req_ready;
wire curr_bank_snp_req_valid; wire curr_bank_snp_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
wire curr_bank_snp_req_full; wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
wire curr_bank_snp_req_ready;
wire curr_bank_snp_fwd_valid; wire curr_bank_snp_rsp_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_fwd_addr; wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
wire curr_bank_snp_fwd_pop; wire curr_bank_snp_rsp_ready;
wire curr_bank_reqq_full; wire curr_bank_core_req_ready;
// Core Req // Core Req
assign curr_bank_core_req_valids = per_bank_valids[i]; assign curr_bank_core_req_valids = per_bank_valids[i] & {NUM_REQUESTS{core_req_ready}};
assign curr_bank_core_req_addr = core_req_addr; assign curr_bank_core_req_addr = core_req_addr;
assign curr_bank_core_req_data = core_req_data; assign curr_bank_core_req_data = core_req_data;
assign curr_bank_core_req_tag = core_req_tag; assign curr_bank_core_req_tag = core_req_tag;
assign curr_bank_core_req_read = core_req_read; assign curr_bank_core_req_read = core_req_read;
assign curr_bank_core_req_write = core_req_write; assign curr_bank_core_req_write = core_req_write;
assign per_bank_reqq_full[i] = curr_bank_reqq_full; assign per_bank_core_req_ready[i] = curr_bank_core_req_ready;
// Core WB // Core WB
assign curr_bank_core_rsp_pop = per_bank_core_rsp_pop[i]; assign curr_bank_core_rsp_ready = per_bank_core_rsp_ready[i];
assign per_bank_core_rsp_valid [i] = curr_bank_core_rsp_valid; assign per_bank_core_rsp_valid [i] = curr_bank_core_rsp_valid;
assign per_bank_core_rsp_tid [i] = curr_bank_core_rsp_tid; assign per_bank_core_rsp_tid [i] = curr_bank_core_rsp_tid;
assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag; assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag;
assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data; assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data;
// Dram fill request // Dram fill request
assign curr_bank_dram_fill_req_full = dfqq_full;
assign per_bank_dram_fill_req_valid[i] = curr_bank_dram_fill_req_valid; assign per_bank_dram_fill_req_valid[i] = curr_bank_dram_fill_req_valid;
assign per_bank_dram_fill_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_fill_req_addr, i); assign per_bank_dram_fill_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_fill_req_addr, i);
assign per_bank_dram_fill_req_is_snp[i] = curr_bank_dram_fill_req_is_snp; assign curr_bank_dram_fill_req_ready = dram_fill_req_ready;
// Dram fill response // Dram fill response
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i); assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i);
@@ -219,44 +288,46 @@ module VX_cache #(
assign curr_bank_dram_fill_rsp_data = dram_rsp_data; assign curr_bank_dram_fill_rsp_data = dram_rsp_data;
assign per_bank_dram_fill_rsp_ready[i] = curr_bank_dram_fill_rsp_ready; assign per_bank_dram_fill_rsp_ready[i] = curr_bank_dram_fill_rsp_ready;
// Dram writeback request // Dram writeback request
assign curr_bank_dram_wb_req_pop = per_bank_dram_wb_queue_pop[i];
assign per_bank_dram_wb_req_valid[i] = curr_bank_dram_wb_req_valid; assign per_bank_dram_wb_req_valid[i] = curr_bank_dram_wb_req_valid;
assign per_bank_dram_wb_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_wb_req_addr, i); assign per_bank_dram_wb_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_wb_req_addr, i);
assign per_bank_dram_wb_req_data[i] = curr_bank_dram_wb_req_data; assign per_bank_dram_wb_req_data[i] = curr_bank_dram_wb_req_data;
assign curr_bank_dram_wb_req_ready = per_bank_dram_wb_req_ready[i];
// Snoop Request // Snoop request
assign curr_bank_snp_req_valid = snp_req_valid && (`DRAM_ADDR_BANK(snp_req_addr) == i); assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i);
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr); assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual);
assign per_bank_snp_req_full[i] = curr_bank_snp_req_full; assign curr_bank_snp_req_tag = snp_req_tag_qual;
assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready;
// Snoop Fwd // Snoop response
assign per_bank_snp_fwd_valid[i] = curr_bank_snp_fwd_valid; assign per_bank_snp_rsp_valid[i] = curr_bank_snp_rsp_valid;
assign per_bank_snp_fwd_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_snp_fwd_addr, i); assign per_bank_snp_rsp_tag[i] = curr_bank_snp_rsp_tag;
assign curr_bank_snp_fwd_pop = per_bank_snp_fwd_pop[i]; assign curr_bank_snp_rsp_ready = per_bank_snp_rsp_ready[i];
VX_bank #( VX_bank #(
.CACHE_SIZE (CACHE_SIZE), .CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS), .NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES), .STAGE_1_CYCLES (STAGE_1_CYCLES),
.REQQ_SIZE (REQQ_SIZE), .REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE), .MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE), .DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE), .SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE), .CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE), .DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE), .DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE), .LLVQ_SIZE (LLVQ_SIZE),
.FFSQ_SIZE (FFSQ_SIZE), .SRPQ_SIZE (SRPQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (DRAM_ENABLE), .DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE), .WRITE_ENABLE (WRITE_ENABLE),
.SNOOP_FORWARDING (SNOOP_FORWARDING), .SNOOP_FORWARDING (SNOOP_FORWARDING),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
) bank ( ) bank (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@@ -267,21 +338,19 @@ module VX_cache #(
.core_req_addr (curr_bank_core_req_addr), .core_req_addr (curr_bank_core_req_addr),
.core_req_data (curr_bank_core_req_data), .core_req_data (curr_bank_core_req_data),
.core_req_tag (curr_bank_core_req_tag), .core_req_tag (curr_bank_core_req_tag),
.core_req_full (curr_bank_reqq_full), .core_req_ready (curr_bank_core_req_ready),
.core_req_ready (core_req_ready),
// Core response // Core response
.core_rsp_valid (curr_bank_core_rsp_valid), .core_rsp_valid (curr_bank_core_rsp_valid),
.core_rsp_tid (curr_bank_core_rsp_tid), .core_rsp_tid (curr_bank_core_rsp_tid),
.core_rsp_data (curr_bank_core_rsp_data), .core_rsp_data (curr_bank_core_rsp_data),
.core_rsp_tag (curr_bank_core_rsp_tag), .core_rsp_tag (curr_bank_core_rsp_tag),
.core_rsp_pop (curr_bank_core_rsp_pop), .core_rsp_ready (curr_bank_core_rsp_ready),
// Dram fill request // Dram fill request
.dram_fill_req_valid (curr_bank_dram_fill_req_valid), .dram_fill_req_valid (curr_bank_dram_fill_req_valid),
.dram_fill_req_addr (curr_bank_dram_fill_req_addr), .dram_fill_req_addr (curr_bank_dram_fill_req_addr),
.dram_fill_req_is_snp (curr_bank_dram_fill_req_is_snp), .dram_fill_req_ready (curr_bank_dram_fill_req_ready),
.dram_fill_req_full (curr_bank_dram_fill_req_full),
// Dram fill response // Dram fill response
.dram_fill_rsp_valid (curr_bank_dram_fill_rsp_valid), .dram_fill_rsp_valid (curr_bank_dram_fill_rsp_valid),
@@ -293,20 +362,45 @@ module VX_cache #(
.dram_wb_req_valid (curr_bank_dram_wb_req_valid), .dram_wb_req_valid (curr_bank_dram_wb_req_valid),
.dram_wb_req_addr (curr_bank_dram_wb_req_addr), .dram_wb_req_addr (curr_bank_dram_wb_req_addr),
.dram_wb_req_data (curr_bank_dram_wb_req_data), .dram_wb_req_data (curr_bank_dram_wb_req_data),
.dram_wb_req_pop (curr_bank_dram_wb_req_pop), .dram_wb_req_ready (curr_bank_dram_wb_req_ready),
// Snoop request // Snoop request
.snp_req_valid (curr_bank_snp_req_valid), .snp_req_valid (curr_bank_snp_req_valid),
.snp_req_addr (curr_bank_snp_req_addr), .snp_req_addr (curr_bank_snp_req_addr),
.snp_req_full (curr_bank_snp_req_full), .snp_req_tag (curr_bank_snp_req_tag),
.snp_req_ready (curr_bank_snp_req_ready),
// Snoop forwarding // Snoop response
.snp_fwd_valid (curr_bank_snp_fwd_valid), .snp_rsp_valid (curr_bank_snp_rsp_valid),
.snp_fwd_addr (curr_bank_snp_fwd_addr), .snp_rsp_tag (curr_bank_snp_rsp_tag),
.snp_fwd_pop (curr_bank_snp_fwd_pop) .snp_rsp_ready (curr_bank_snp_rsp_ready)
); );
end end
endgenerate endgenerate
VX_cache_dram_req_arb #(
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.PRFQ_SIZE (PRFQ_SIZE),
.PRFQ_STRIDE (PRFQ_STRIDE)
) cache_dram_req_arb (
.clk (clk),
.reset (reset),
.per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid),
.per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr),
.dram_fill_req_ready (dram_fill_req_ready),
.per_bank_dram_wb_req_valid (per_bank_dram_wb_req_valid),
.per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr),
.per_bank_dram_wb_req_data (per_bank_dram_wb_req_data),
.per_bank_dram_wb_req_ready (per_bank_dram_wb_req_ready),
.dram_req_read (dram_req_read),
.dram_req_write (dram_req_write),
.dram_req_addr (dram_req_addr),
.dram_req_data (dram_req_data),
.dram_req_ready (dram_req_ready)
);
VX_cache_core_rsp_merge #( VX_cache_core_rsp_merge #(
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
@@ -319,48 +413,24 @@ module VX_cache #(
.per_bank_core_rsp_valid (per_bank_core_rsp_valid), .per_bank_core_rsp_valid (per_bank_core_rsp_valid),
.per_bank_core_rsp_data (per_bank_core_rsp_data), .per_bank_core_rsp_data (per_bank_core_rsp_data),
.per_bank_core_rsp_tag (per_bank_core_rsp_tag), .per_bank_core_rsp_tag (per_bank_core_rsp_tag),
.per_bank_core_rsp_pop (per_bank_core_rsp_pop), .per_bank_core_rsp_ready (per_bank_core_rsp_ready),
.core_rsp_valid (core_rsp_valid), .core_rsp_valid (core_rsp_valid),
.core_rsp_data (core_rsp_data), .core_rsp_data (core_rsp_data),
.core_rsp_tag (core_rsp_tag), .core_rsp_tag (core_rsp_tag),
.core_rsp_ready (core_rsp_ready) .core_rsp_ready (core_rsp_ready)
); );
VX_cache_dram_req_arb #( VX_snp_rsp_arb #(
.BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS),
.NUM_BANKS (NUM_BANKS), .BANK_LINE_SIZE (BANK_LINE_SIZE),
.WORD_SIZE (WORD_SIZE), .SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
.DFQQ_SIZE (DFQQ_SIZE), ) snp_rsp_arb (
.PRFQ_SIZE (PRFQ_SIZE), .per_bank_snp_rsp_valid (per_bank_snp_rsp_valid),
.PRFQ_STRIDE (PRFQ_STRIDE) .per_bank_snp_rsp_tag (per_bank_snp_rsp_tag),
) cache_dram_req_arb ( .per_bank_snp_rsp_ready (per_bank_snp_rsp_ready),
.clk (clk), .snp_rsp_valid (snp_rsp_valid),
.reset (reset), .snp_rsp_tag (snp_rsp_tag),
.dfqq_full (dfqq_full), .snp_rsp_ready (snp_rsp_ready)
.per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid),
.per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr),
.per_bank_dram_wb_queue_pop (per_bank_dram_wb_queue_pop),
.per_bank_dram_wb_req_valid (per_bank_dram_wb_req_valid),
.per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr),
.per_bank_dram_wb_req_data (per_bank_dram_wb_req_data),
.dram_req_read (dram_req_read),
.dram_req_write (dram_req_write),
.dram_req_addr (dram_req_addr),
.dram_req_data (dram_req_data),
.dram_req_ready (dram_req_ready)
);
VX_snp_fwd_arb #(
.NUM_BANKS(NUM_BANKS),
.BANK_LINE_SIZE(BANK_LINE_SIZE)
) snp_fwd_arb (
.per_bank_snp_fwd_valid (per_bank_snp_fwd_valid),
.per_bank_snp_fwd_addr (per_bank_snp_fwd_addr),
.per_bank_snp_fwd_pop (per_bank_snp_fwd_pop),
.snp_fwd_valid (snp_fwd_valid),
.snp_fwd_addr (snp_fwd_addr),
.snp_fwd_ready (snp_fwd_ready)
); );
endmodule endmodule

View File

@@ -21,7 +21,7 @@ module VX_cache_core_req_bank_sel #(
integer i; integer i;
always @(*) begin always @(*) begin
per_bank_valids = 0; per_bank_valids = 0;
for (i = 0; i < NUM_REQUESTS; i = i + 1) begin for (i = 0; i < NUM_REQUESTS; i++) begin
if (NUM_BANKS == 1) begin if (NUM_BANKS == 1) begin
// If there is only one bank, then only map requests to that bank // If there is only one bank, then only map requests to that bank
per_bank_valids[0][i] = core_req_valid[i]; per_bank_valids[0][i] = core_req_valid[i];

View File

@@ -17,7 +17,7 @@ module VX_cache_core_rsp_merge #(
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid, input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data, input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag, input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
output wire [NUM_BANKS-1:0] per_bank_core_rsp_pop, output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
// Core Writeback // Core Writeback
output reg [NUM_REQUESTS-1:0] core_rsp_valid, output reg [NUM_REQUESTS-1:0] core_rsp_valid,
@@ -28,7 +28,7 @@ module VX_cache_core_rsp_merge #(
reg [NUM_BANKS-1:0] per_bank_core_rsp_pop_unqual; reg [NUM_BANKS-1:0] per_bank_core_rsp_pop_unqual;
assign per_bank_core_rsp_pop = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}}; assign per_bank_core_rsp_ready = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}};
wire [`BANK_BITS-1:0] main_bank_index; wire [`BANK_BITS-1:0] main_bank_index;
wire found_bank; wire found_bank;
@@ -48,7 +48,7 @@ module VX_cache_core_rsp_merge #(
always @(*) begin always @(*) begin
core_rsp_valid = 0; core_rsp_valid = 0;
core_rsp_data = 0; core_rsp_data = 0;
for (i = 0; i < NUM_BANKS; i = i + 1) begin for (i = 0; i < NUM_BANKS; i++) begin
if (found_bank if (found_bank
&& per_bank_core_rsp_valid[i] && per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]] && !core_rsp_valid[per_bank_core_rsp_tid[i]]
@@ -68,7 +68,7 @@ module VX_cache_core_rsp_merge #(
core_rsp_valid = 0; core_rsp_valid = 0;
core_rsp_data = 0; core_rsp_data = 0;
core_rsp_tag = 0; core_rsp_tag = 0;
for (i = 0; i < NUM_BANKS; i = i + 1) begin for (i = 0; i < NUM_BANKS; i++) begin
if (found_bank if (found_bank
&& per_bank_core_rsp_valid[i] && per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]] && !core_rsp_valid[per_bank_core_rsp_tid[i]]

View File

@@ -19,13 +19,13 @@ module VX_cache_dram_req_arb #(
// Fill Request // Fill Request
input wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid, input wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid,
input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr, input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr,
output wire dfqq_full, output wire dram_fill_req_ready,
// Writeback Request // Writeback Request
input wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid, input wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid,
input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr, input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr,
input wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data, input wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data,
output wire [NUM_BANKS-1:0] per_bank_dram_wb_queue_pop, output wire [NUM_BANKS-1:0] per_bank_dram_wb_req_ready,
// Merged Request // Merged Request
output wire dram_req_read, output wire dram_req_read,
@@ -70,6 +70,7 @@ module VX_cache_dram_req_arb #(
wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop
wire dfqq_push = (| per_bank_dram_fill_req_valid); wire dfqq_push = (| per_bank_dram_fill_req_valid);
wire dfqq_full;
VX_cache_dfq_queue #( VX_cache_dfq_queue #(
.BANK_LINE_SIZE(BANK_LINE_SIZE), .BANK_LINE_SIZE(BANK_LINE_SIZE),
@@ -100,7 +101,9 @@ module VX_cache_dram_req_arb #(
.found (dwb_valid) .found (dwb_valid)
); );
assign per_bank_dram_wb_queue_pop = dram_req_ready ? (use_wb_valid & ((1 << dwb_bank))) : 0; assign dram_fill_req_ready = ~dfqq_full;
assign per_bank_dram_wb_req_ready = dram_req_ready ? (use_wb_valid & ((1 << dwb_bank))) : 0;
wire dram_req_valid = dwb_valid || dfqq_req || pref_pop; wire dram_req_valid = dwb_valid || dfqq_req || pref_pop;

View File

@@ -66,7 +66,7 @@ module VX_cache_miss_resrv #(
reg [MRVQ_SIZE-1:0] make_ready; reg [MRVQ_SIZE-1:0] make_ready;
genvar i; genvar i;
generate generate
for (i = 0; i < MRVQ_SIZE; i=i+1) begin for (i = 0; i < MRVQ_SIZE; i++) begin
assign make_ready[i] = is_fill_st1 && valid_table[i] && (addr_table[i] == fill_addr_st1); assign make_ready[i] = is_fill_st1 && valid_table[i] && (addr_table[i] == fill_addr_st1);
end end
endgenerate endgenerate

116
hw/rtl/cache/VX_snp_forwarder.v vendored Normal file
View File

@@ -0,0 +1,116 @@
`include "VX_define.vh"
module VX_snp_forwarder #(
parameter BANK_LINE_SIZE = 0,
parameter NUM_REQUESTS = 0,
parameter SNRQ_SIZE = 0,
parameter SNP_REQ_TAG_WIDTH = 0,
parameter SNP_FWD_TAG_WIDTH = 0
) (
input wire clk,
input wire reset,
// Snoop request
input wire snp_req_valid,
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop response
output wire snp_rsp_valid,
output wire [`DRAM_ADDR_WIDTH-1:0] snp_rsp_addr,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// Snoop Forwarding out
output wire [NUM_REQUESTS-1:0] snp_fwdout_valid,
output wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdout_tag,
input wire [NUM_REQUESTS-1:0] snp_fwdout_ready,
// Snoop forwarding in
input wire [NUM_REQUESTS-1:0] snp_fwdin_valid,
input wire [NUM_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdin_tag,
output wire [NUM_REQUESTS-1:0] snp_fwdin_ready
);
reg [`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH-1:0] pending_reqs [SNRQ_SIZE-1:0];
reg [`REQS_BITS-1:0] pending_cntrs [SNRQ_SIZE-1:0];
reg [`LOG2UP(SNRQ_SIZE)-1:0] rd_ptr, wr_ptr;
reg [`LOG2UP(SNRQ_SIZE)-1:0] pending_size;
reg [`REQS_BITS-1:0] fwdin_sel;
wire enqueue, dequeue;
wire fwdout_ready;
wire fwdin_valid;
wire [SNP_FWD_TAG_WIDTH-1:0] fwdin_tag;
wire fwdin_ready;
wire fwdin_taken;
assign fwdout_ready = (& snp_fwdout_ready);
assign snp_req_ready = (pending_size != `LOG2UP(SNRQ_SIZE)'(SNRQ_SIZE-1)) // not full
&& fwdout_ready;
genvar i;
for (i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdout_valid[i] = enqueue && fwdout_ready;
assign snp_fwdout_addr[i] = snp_req_addr;
assign snp_fwdout_tag[i] = wr_ptr;
end
assign fwdin_ready = snp_rsp_ready;
assign fwdin_taken = fwdin_valid && fwdin_ready;
assign snp_rsp_valid = fwdin_taken && (1 == pending_cntrs[fwdin_tag]); // send response
assign {snp_rsp_addr, snp_rsp_tag} = pending_reqs[fwdin_tag];
assign enqueue = snp_req_valid && snp_req_ready;
assign dequeue = snp_rsp_valid && (rd_ptr == fwdin_tag);
always @(posedge clk) begin
if (reset) begin
rd_ptr <= 0;
wr_ptr <= 0;
pending_size <= 0;
fwdin_sel <= 0;
end else begin
if (enqueue) begin
pending_reqs[wr_ptr] <= {snp_req_addr, snp_req_tag};
pending_cntrs[wr_ptr] <= `REQS_BITS'(NUM_REQUESTS);
wr_ptr <= wr_ptr + 1;
if (!dequeue) begin
pending_size <= pending_size + 1;
end
end
if (dequeue) begin
rd_ptr <= rd_ptr + 1;
if (!enqueue) begin
pending_size <= pending_size - 1;
end
end
if (fwdin_taken) begin
pending_cntrs[fwdin_tag] <= pending_cntrs[fwdin_tag] - 1;
end
end
end
always @(posedge clk) begin
if (reset) begin
fwdin_sel <= 0;
end else begin
fwdin_sel <= fwdin_sel + 1;
end
end
assign fwdin_valid = snp_fwdin_valid[fwdin_sel];
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
for (i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
end
endmodule

View File

@@ -1,39 +0,0 @@
`include "VX_cache_config.vh"
module VX_snp_fwd_arb #(
parameter NUM_BANKS = 1,
parameter BANK_LINE_SIZE = 1
) (
input wire [NUM_BANKS-1:0] per_bank_snp_fwd_valid,
input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_snp_fwd_addr,
output reg [NUM_BANKS-1:0] per_bank_snp_fwd_pop,
output wire snp_fwd_valid,
output wire [`DRAM_ADDR_WIDTH-1:0] snp_fwd_addr,
input wire snp_fwd_ready
);
wire [NUM_BANKS-1:0] qual_per_bank_snp_fwd = per_bank_snp_fwd_valid & {NUM_BANKS{snp_fwd_ready}};
wire [`BANK_BITS-1:0] fsq_bank;
wire fsq_valid;
VX_generic_priority_encoder #(
.N(NUM_BANKS)
) sel_ffsq (
.valids (qual_per_bank_snp_fwd),
.index (fsq_bank),
.found (fsq_valid)
);
assign snp_fwd_valid = fsq_valid;
assign snp_fwd_addr = per_bank_snp_fwd_addr[fsq_bank];
always @(*) begin
per_bank_snp_fwd_pop = 0;
if (fsq_valid) begin
per_bank_snp_fwd_pop[fsq_bank] = 1;
end
end
endmodule

38
hw/rtl/cache/VX_snp_rsp_arb.v vendored Normal file
View File

@@ -0,0 +1,38 @@
`include "VX_cache_config.vh"
module VX_snp_rsp_arb #(
parameter NUM_BANKS = 0,
parameter BANK_LINE_SIZE = 0,
parameter SNP_REQ_TAG_WIDTH = 0
) (
input wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid,
input wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag,
output wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready,
output wire snp_rsp_valid,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready
);
wire [NUM_BANKS-1:0] qual_per_bank_snp_rsp = per_bank_snp_rsp_valid & {NUM_BANKS{snp_rsp_ready}};
wire [`BANK_BITS-1:0] fsq_bank;
wire fsq_valid;
VX_generic_priority_encoder #(
.N(NUM_BANKS)
) sel_ffsq (
.valids (qual_per_bank_snp_rsp),
.index (fsq_bank),
.found (fsq_valid)
);
assign snp_rsp_valid = fsq_valid;
assign snp_rsp_tag = per_bank_snp_rsp_tag[fsq_bank];
genvar i;
for (i = 0; i < NUM_BANKS; i++) begin
assign per_bank_snp_rsp_ready[i] = fsq_valid && (fsq_bank == `BANK_BITS'(i));
end
endmodule

View File

@@ -110,7 +110,7 @@ module VX_tag_data_access #(
); );
genvar i; genvar i;
for (i = 1; i < STAGE_1_CYCLES-1; i = i + 1) begin for (i = 1; i < STAGE_1_CYCLES-1; i++) begin
VX_generic_register #( VX_generic_register #(
.N( 1 + 1 + `TAG_SELECT_BITS + `BANK_LINE_WIDTH) .N( 1 + 1 + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
) s0_1_cc ( ) s0_1_cc (
@@ -127,7 +127,7 @@ module VX_tag_data_access #(
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE; // Dirty only applies in Dcache assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE; // Dirty only applies in Dcache
assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writeaddr_st1e[`TAG_LINE_ADDR_RNG]; // Tag is always the same in SM assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writeaddr_st1e[`TAG_LINE_ADDR_RNG]; // Tag is always the same in SM
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin for (i = 0; i < `BANK_LINE_WORDS; i++) begin
assign use_read_data_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] = read_data_st1c[STAGE_1_CYCLES-1][i * `WORD_WIDTH +: `WORD_WIDTH]; assign use_read_data_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] = read_data_st1c[STAGE_1_CYCLES-1][i * `WORD_WIDTH +: `WORD_WIDTH];
end end
@@ -144,7 +144,7 @@ module VX_tag_data_access #(
&& !miss_st1e && !miss_st1e
&& !is_snp_st1e; && !is_snp_st1e;
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin for (i = 0; i < `BANK_LINE_WORDS; i++) begin
assign we[i] = (force_write || (should_write && !real_writefill)) ? 4'b1111 : 4'b0000; assign we[i] = (force_write || (should_write && !real_writefill)) ? 4'b1111 : 4'b0000;
end end
@@ -199,7 +199,7 @@ module VX_tag_data_access #(
assign readword_st1e = data_Qual; assign readword_st1e = data_Qual;
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin for (i = 0; i < `BANK_LINE_WORDS; i++) begin
wire normal_write = (block_offset == i[`WORD_SELECT_BITS-1:0]) && should_write && !real_writefill; wire normal_write = (block_offset == i[`WORD_SELECT_BITS-1:0]) && should_write && !real_writefill;
assign we[i] = (force_write) ? 4'b1111 : assign we[i] = (force_write) ? 4'b1111 :

View File

@@ -44,7 +44,7 @@ module VX_tag_data_structure #(
integer i; integer i;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
for (i = 0; i < `BANK_LINE_COUNT; i = i + 1) begin for (i = 0; i < `BANK_LINE_COUNT; i++) begin
valid[i] <= 0; valid[i] <= 0;
dirty[i] <= 0; dirty[i] <= 0;
end end
@@ -65,7 +65,7 @@ module VX_tag_data_structure #(
valid[write_addr] <= 0; valid[write_addr] <= 0;
end end
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin for (i = 0; i < `BANK_LINE_WORDS; i++) begin
if (write_enable[i][0]) data[write_addr][i][0] <= write_data[i * `WORD_WIDTH + 0 * `BYTE_WIDTH +: `BYTE_WIDTH]; if (write_enable[i][0]) data[write_addr][i][0] <= write_data[i * `WORD_WIDTH + 0 * `BYTE_WIDTH +: `BYTE_WIDTH];
if (write_enable[i][1]) data[write_addr][i][1] <= write_data[i * `WORD_WIDTH + 1 * `BYTE_WIDTH +: `BYTE_WIDTH]; if (write_enable[i][1]) data[write_addr][i][1] <= write_data[i * `WORD_WIDTH + 1 * `BYTE_WIDTH +: `BYTE_WIDTH];
if (write_enable[i][2]) data[write_addr][i][2] <= write_data[i * `WORD_WIDTH + 2 * `BYTE_WIDTH +: `BYTE_WIDTH]; if (write_enable[i][2]) data[write_addr][i][2] <= write_data[i * `WORD_WIDTH + 2 * `BYTE_WIDTH +: `BYTE_WIDTH];

View File

@@ -4,11 +4,13 @@
`include "../cache/VX_cache_config.vh" `include "../cache/VX_cache_config.vh"
interface VX_cache_snp_req_if #( interface VX_cache_snp_req_if #(
parameter DRAM_ADDR_WIDTH = 1 parameter DRAM_ADDR_WIDTH = 0,
parameter SNP_TAG_WIDTH = 0
) (); ) ();
wire snp_req_valid; wire snp_req_valid;
wire [DRAM_ADDR_WIDTH-1:0] snp_req_addr; wire [DRAM_ADDR_WIDTH-1:0] snp_req_addr;
wire [SNP_TAG_WIDTH-1:0] snp_req_tag;
wire snp_req_ready; wire snp_req_ready;
endinterface endinterface

View File

@@ -0,0 +1,16 @@
`ifndef VX_CACHE_SNP_RSP_IF
`define VX_CACHE_SNP_RSP_IF
`include "../cache/VX_cache_config.vh"
interface VX_cache_snp_rsp_if #(
parameter SNP_TAG_WIDTH = 0
) ();
wire snp_rsp_valid;
wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag;
wire snp_rsp_ready;
endinterface
`endif

View File

@@ -160,21 +160,29 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
// align address to LLC block boundaries // align address to LLC block boundaries
auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE; auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE;
auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE; auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
int outstanding_snp_reqs = 0;
// submit snoop requests for the needed blocks // submit snoop requests for the needed blocks
vortex_->snp_req_addr = aligned_addr_start; vortex_->snp_req_addr = aligned_addr_start;
vortex_->snp_req_valid = true; vortex_->snp_req_valid = true;
vortex_->snp_rsp_ready = true;
for (;;) { for (;;) {
this->step(); this->step();
if (vortex_->snp_rsp_valid) {
--outstanding_snp_reqs;
}
if (vortex_->snp_req_valid && vortex_->snp_req_ready) { if (vortex_->snp_req_valid && vortex_->snp_req_ready) {
++outstanding_snp_reqs;
vortex_->snp_req_addr += 1; vortex_->snp_req_addr += 1;
if (vortex_->snp_req_addr >= aligned_addr_end) { if (vortex_->snp_req_addr >= aligned_addr_end) {
vortex_->snp_req_valid = false; vortex_->snp_req_valid = false;
break;
} }
} }
} if (!vortex_->snp_req_valid
this->wait(PIPELINE_FLUSH_LATENCY); && 0 == outstanding_snp_reqs) {
break;
}
}
} }
bool Simulator::run() { bool Simulator::run() {

View File

@@ -18,7 +18,6 @@
#define DRAM_LATENCY 100 #define DRAM_LATENCY 100
#define DRAM_RQ_SIZE 16 #define DRAM_RQ_SIZE 16
#define DRAM_STALLS_MODULO 16 #define DRAM_STALLS_MODULO 16
#define PIPELINE_FLUSH_LATENCY 1000
typedef struct { typedef struct {
int cycles_left; int cycles_left;