From def6a3569303136928dc260b05e0d7224e56b8d2 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 29 Nov 2020 15:04:31 -0800 Subject: [PATCH] shared memory optimization --- hw/opae/vortex_afu.sv | 4 +- hw/rtl/VX_cluster.v | 40 ++--- hw/rtl/VX_config.vh | 2 +- hw/rtl/VX_core.v | 12 +- hw/rtl/VX_mem_unit.v | 6 +- hw/rtl/Vortex.v | 20 +-- hw/rtl/cache/VX_bank.v | 308 +++++++++++++++++++------------- hw/rtl/cache/VX_cache.v | 12 +- hw/rtl/cache/VX_cache_config.vh | 2 +- hw/rtl/cache/VX_data_access.v | 62 +++---- hw/rtl/cache/VX_data_store.v | 21 ++- hw/rtl/cache/VX_snp_forwarder.v | 52 +++--- hw/rtl/cache/VX_tag_access.v | 97 +++++----- hw/scripts/scope.json | 4 +- 14 files changed, 351 insertions(+), 291 deletions(-) diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 54dacc12..50d3ac2a 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -114,7 +114,7 @@ wire vx_dram_rsp_ready; reg vx_snp_req_valid; reg [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; -wire vx_snp_req_invalidate = 0; +wire vx_snp_req_inv = 0; wire [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; wire vx_snp_req_ready; @@ -989,7 +989,7 @@ Vortex #() vortex ( // Snoop request .snp_req_valid (vx_snp_req_valid), .snp_req_addr (vx_snp_req_addr), - .snp_req_invalidate(vx_snp_req_invalidate), + .snp_req_inv (vx_snp_req_inv), .snp_req_tag (vx_snp_req_tag), .snp_req_ready (vx_snp_req_ready), diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index e2617515..34c3df00 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -27,7 +27,7 @@ module VX_cluster #( // Snoop request input wire snp_req_valid, input wire [`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_invalidate, + input wire snp_req_inv, input wire [`L2SNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, @@ -96,7 +96,7 @@ module VX_cluster #( wire [`NUM_CORES-1:0] per_core_snp_req_valid; wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_req_addr; - wire [`NUM_CORES-1:0] per_core_snp_req_invalidate; + wire [`NUM_CORES-1:0] per_core_snp_req_inv; wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_req_tag; wire [`NUM_CORES-1:0] per_core_snp_req_ready; @@ -165,7 +165,7 @@ module VX_cluster #( .snp_req_valid (per_core_snp_req_valid [i]), .snp_req_addr (per_core_snp_req_addr [i]), - .snp_req_invalidate (per_core_snp_req_invalidate[i]), + .snp_req_inv (per_core_snp_req_inv [i]), .snp_req_tag (per_core_snp_req_tag [i]), .snp_req_ready (per_core_snp_req_ready [i]), @@ -296,7 +296,7 @@ module VX_cluster #( wire[`NUM_CORES-1:0] core_snp_fwdout_valid; wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr; - wire[`NUM_CORES-1:0] core_snp_fwdout_invalidate; + wire[`NUM_CORES-1:0] core_snp_fwdout_inv; wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag; wire[`NUM_CORES-1:0] core_snp_fwdout_ready; @@ -306,7 +306,7 @@ module VX_cluster #( wire snp_fwd_rsp_valid; wire [`L2DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr; - wire snp_fwd_rsp_invalidate; + wire snp_fwd_rsp_inv; wire [`L2SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag; wire snp_fwd_rsp_ready; @@ -367,7 +367,7 @@ module VX_cluster #( assign per_core_snp_req_valid [(i/2)] = core_snp_fwdout_valid [(i/2)]; assign per_core_snp_req_addr [(i/2)] = core_snp_fwdout_addr [(i/2)]; - assign per_core_snp_req_invalidate [(i/2)] = core_snp_fwdout_invalidate [(i/2)]; + assign per_core_snp_req_inv [(i/2)] = core_snp_fwdout_inv [(i/2)]; assign per_core_snp_req_tag [(i/2)] = core_snp_fwdout_tag [(i/2)]; assign core_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; @@ -391,19 +391,19 @@ module VX_cluster #( .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), - .snp_req_invalidate (snp_req_invalidate), + .snp_req_inv (snp_req_inv), .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), .snp_rsp_valid (snp_fwd_rsp_valid), .snp_rsp_addr (snp_fwd_rsp_addr), - .snp_rsp_invalidate (snp_fwd_rsp_invalidate), + .snp_rsp_inv (snp_fwd_rsp_inv), .snp_rsp_tag (snp_fwd_rsp_tag), .snp_rsp_ready (snp_fwd_rsp_ready), .snp_fwdout_valid (core_snp_fwdout_valid), .snp_fwdout_addr (core_snp_fwdout_addr), - .snp_fwdout_invalidate(core_snp_fwdout_invalidate), + .snp_fwdout_inv (core_snp_fwdout_inv), .snp_fwdout_tag (core_snp_fwdout_tag), .snp_fwdout_ready (core_snp_fwdout_ready), @@ -472,7 +472,7 @@ module VX_cluster #( // Snoop request .snp_req_valid (snp_fwd_rsp_valid), .snp_req_addr (snp_fwd_rsp_addr), - .snp_req_invalidate (snp_fwd_rsp_invalidate), + .snp_req_inv (snp_fwd_rsp_inv), .snp_req_tag (snp_fwd_rsp_tag), .snp_req_ready (snp_fwd_rsp_ready), @@ -502,7 +502,7 @@ module VX_cluster #( wire[`NUM_CORES-1:0] core_snp_fwdout_valid; wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] core_snp_fwdout_addr; - wire[`NUM_CORES-1:0] core_snp_fwdout_invalidate; + wire[`NUM_CORES-1:0] core_snp_fwdout_inv; wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] core_snp_fwdout_tag; wire[`NUM_CORES-1:0] core_snp_fwdout_ready; @@ -546,7 +546,7 @@ module VX_cluster #( assign per_core_snp_req_valid [(i/2)] = core_snp_fwdout_valid [(i/2)]; assign per_core_snp_req_addr [(i/2)] = core_snp_fwdout_addr [(i/2)]; - assign per_core_snp_req_invalidate [(i/2)] = core_snp_fwdout_invalidate [(i/2)]; + assign per_core_snp_req_inv [(i/2)] = core_snp_fwdout_inv [(i/2)]; assign per_core_snp_req_tag [(i/2)] = core_snp_fwdout_tag [(i/2)]; assign core_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)]; @@ -569,19 +569,19 @@ module VX_cluster #( .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), - .snp_req_invalidate (snp_req_invalidate), + .snp_req_inv (snp_req_inv), .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), .snp_rsp_valid (snp_rsp_valid), `UNUSED_PIN (snp_rsp_addr), - `UNUSED_PIN (snp_rsp_invalidate), + `UNUSED_PIN (snp_rsp_inv), .snp_rsp_tag (snp_rsp_tag), .snp_rsp_ready (snp_rsp_ready), .snp_fwdout_valid (core_snp_fwdout_valid), .snp_fwdout_addr (core_snp_fwdout_addr), - .snp_fwdout_invalidate(core_snp_fwdout_invalidate), + .snp_fwdout_inv (core_snp_fwdout_inv), .snp_fwdout_tag (core_snp_fwdout_tag), .snp_fwdout_ready (core_snp_fwdout_ready), @@ -590,11 +590,11 @@ module VX_cluster #( .snp_fwdin_ready (core_snp_fwdin_ready) ); end else begin - assign core_snp_fwdout_valid = snp_req_valid; - assign core_snp_fwdout_addr = snp_req_addr; - assign core_snp_fwdout_invalidate = snp_req_invalidate; - assign core_snp_fwdout_tag = snp_req_tag; - assign snp_req_ready = core_snp_fwdout_ready; + assign core_snp_fwdout_valid= snp_req_valid; + assign core_snp_fwdout_addr = snp_req_addr; + assign core_snp_fwdout_inv = snp_req_inv; + assign core_snp_fwdout_tag = snp_req_tag; + assign snp_req_ready = core_snp_fwdout_ready; assign snp_rsp_valid = core_snp_fwdin_valid; assign snp_rsp_tag = core_snp_fwdin_tag; diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index b35402dd..98a3cc5d 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -251,7 +251,7 @@ // Miss Handling Register Size `ifndef IMSHR_SIZE -`define IMSHR_SIZE 4 +`define IMSHR_SIZE `MAX(`NUM_WARPS, 4) `endif // DRAM Request Queue Size diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index cfff5700..38efbafd 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -42,7 +42,7 @@ module VX_core #( // Snoop request input wire snp_req_valid, input wire [`DDRAM_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_invalidate, + input wire snp_req_inv, input wire [`DSNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, @@ -238,11 +238,11 @@ module VX_core #( .SNP_TAG_WIDTH(`DSNP_TAG_WIDTH) ) dcache_snp_rsp_if(); - assign dcache_snp_req_if.valid = snp_req_valid; - assign dcache_snp_req_if.addr = snp_req_addr; - assign dcache_snp_req_if.invalidate = snp_req_invalidate; - assign dcache_snp_req_if.tag = snp_req_tag; - assign snp_req_ready = dcache_snp_req_if.ready; + assign dcache_snp_req_if.valid = snp_req_valid; + assign dcache_snp_req_if.addr = snp_req_addr; + assign dcache_snp_req_if.invalidate = snp_req_inv; + assign dcache_snp_req_if.tag = snp_req_tag; + assign snp_req_ready = dcache_snp_req_if.ready; assign snp_rsp_valid = dcache_snp_rsp_if.valid; assign snp_rsp_tag = dcache_snp_rsp_if.tag; diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index b3dc43e2..2ae31ba0 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -117,7 +117,7 @@ module VX_mem_unit # ( // Snoop request .snp_req_valid (1'b0), .snp_req_addr (0), - .snp_req_invalidate (0), + .snp_req_inv (0), .snp_req_tag (0), `UNUSED_PIN (snp_req_ready), @@ -190,7 +190,7 @@ module VX_mem_unit # ( // Snoop request .snp_req_valid (dcache_snp_req_if.valid), .snp_req_addr (dcache_snp_req_if.addr), - .snp_req_invalidate (dcache_snp_req_if.invalidate), + .snp_req_inv (dcache_snp_req_if.invalidate), .snp_req_tag (dcache_snp_req_if.tag), .snp_req_ready (dcache_snp_req_if.ready), @@ -262,7 +262,7 @@ module VX_mem_unit # ( // Snoop request .snp_req_valid (1'b0), .snp_req_addr (0), - .snp_req_invalidate (1'b0), + .snp_req_inv (1'b0), .snp_req_tag (0), `UNUSED_PIN (snp_req_ready), diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index e9458ed9..a6bed5d4 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -25,7 +25,7 @@ module Vortex ( // Snoop request input wire snp_req_valid, input wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_invalidate, + input wire snp_req_inv, input wire [`VX_SNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, @@ -91,7 +91,7 @@ module Vortex ( .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), - .snp_req_invalidate (snp_req_invalidate), + .snp_req_inv (snp_req_inv), .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), @@ -144,7 +144,7 @@ module Vortex ( wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid; wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr; - wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_invalidate; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_inv; wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag; wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready; @@ -205,7 +205,7 @@ module Vortex ( .snp_req_valid (per_cluster_snp_req_valid [i]), .snp_req_addr (per_cluster_snp_req_addr [i]), - .snp_req_invalidate (per_cluster_snp_req_invalidate[i]), + .snp_req_inv (per_cluster_snp_req_inv [i]), .snp_req_tag (per_cluster_snp_req_tag [i]), .snp_req_ready (per_cluster_snp_req_ready [i]), @@ -327,7 +327,7 @@ module Vortex ( wire snp_fwd_rsp_valid; wire [`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_rsp_addr; - wire snp_fwd_rsp_invalidate; + wire snp_fwd_rsp_inv; wire [`L3SNP_TAG_WIDTH-1:0] snp_fwd_rsp_tag; wire snp_fwd_rsp_ready; @@ -364,19 +364,19 @@ module Vortex ( .snp_req_valid (snp_req_valid), .snp_req_addr (snp_req_addr), - .snp_req_invalidate (snp_req_invalidate), + .snp_req_inv (snp_req_inv), .snp_req_tag (snp_req_tag), .snp_req_ready (snp_req_ready), .snp_rsp_valid (snp_fwd_rsp_valid), .snp_rsp_addr (snp_fwd_rsp_addr), - .snp_rsp_invalidate (snp_fwd_rsp_invalidate), + .snp_rsp_inv (snp_fwd_rsp_inv), .snp_rsp_tag (snp_fwd_rsp_tag), .snp_rsp_ready (snp_fwd_rsp_ready), .snp_fwdout_valid (per_cluster_snp_req_valid), .snp_fwdout_addr (per_cluster_snp_req_addr), - .snp_fwdout_invalidate(per_cluster_snp_req_invalidate), + .snp_fwdout_inv (per_cluster_snp_req_inv), .snp_fwdout_tag (per_cluster_snp_req_tag), .snp_fwdout_ready (per_cluster_snp_req_ready), @@ -445,7 +445,7 @@ module Vortex ( // Snoop request .snp_req_valid (snp_fwd_rsp_valid), .snp_req_addr (snp_fwd_rsp_addr), - .snp_req_invalidate (snp_fwd_rsp_invalidate), + .snp_req_inv (snp_fwd_rsp_inv), .snp_req_tag (snp_fwd_rsp_tag), .snp_req_ready (snp_fwd_rsp_ready), @@ -474,7 +474,7 @@ module Vortex ( `SCOPE_ASSIGN (snp_req_fire, snp_req_valid && snp_req_ready); `SCOPE_ASSIGN (snp_req_addr, `TO_FULL_ADDR(snp_req_addr)); - `SCOPE_ASSIGN (snp_req_invalidate, snp_req_invalidate); + `SCOPE_ASSIGN (snp_req_inv, snp_req_inv); `SCOPE_ASSIGN (snp_req_tag, snp_req_tag); `SCOPE_ASSIGN (snp_rsp_fire, snp_rsp_valid && snp_rsp_ready); diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index e5dd51a0..e1fa64e0 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -87,7 +87,7 @@ module VX_bank #( // Snoop Request input wire snp_req_valid, input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_invalidate, + input wire snp_req_inv, input wire [SNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, @@ -99,6 +99,7 @@ module VX_bank #( // Misses output wire misses ); + `STATIC_ASSERT (!FLUSH_ENABLE || DRAM_ENABLE, ("invalid parameter")) `ifdef DBG_CACHE_REQ_INFO /* verilator lint_off UNUSED */ @@ -138,40 +139,57 @@ module VX_bank #( wire snrq_pop; wire snrq_empty; - wire snrq_full; - + wire [`LINE_ADDR_WIDTH-1:0] snrq_addr_st0; - wire snrq_invalidate_st0; - wire [SNP_TAG_WIDTH-1:0] snrq_tag_st0; + wire snrq_inv_st0; + wire [SNP_TAG_WIDTH-1:0] snrq_tag_st0; - wire snp_req_fire = snp_req_valid && snp_req_ready; - assign snp_req_ready = !snrq_full; + if (FLUSH_ENABLE) begin - VX_generic_queue #( - .DATAW(`LINE_ADDR_WIDTH + 1 + SNP_TAG_WIDTH), - .SIZE(SNRQ_SIZE) - ) snp_req_queue ( - .clk (clk), - .reset (reset), - .push (snp_req_fire), - .pop (snrq_pop), - .data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}), - .data_out({snrq_addr_st0, snrq_invalidate_st0, snrq_tag_st0}), - .empty (snrq_empty), - .full (snrq_full), - `UNUSED_PIN (size) - ); + wire snrq_full; + assign snp_req_ready = !snrq_full; + wire snp_req_fire = snp_req_valid && snp_req_ready; + + VX_generic_queue #( + .DATAW(`LINE_ADDR_WIDTH + 1 + SNP_TAG_WIDTH), + .SIZE(SNRQ_SIZE) + ) snp_req_queue ( + .clk (clk), + .reset (reset), + .push (snp_req_fire), + .pop (snrq_pop), + .data_in ({snp_req_addr, snp_req_inv, snp_req_tag}), + .data_out({snrq_addr_st0, snrq_inv_st0, snrq_tag_st0}), + .empty (snrq_empty), + .full (snrq_full), + `UNUSED_PIN (size) + ); + + end else begin + `UNUSED_VAR (snp_req_valid) + `UNUSED_VAR (snp_req_addr) + `UNUSED_VAR (snp_req_inv) + `UNUSED_VAR (snp_req_tag) + assign snrq_empty = 1; + assign snrq_addr_st0 = 0; + assign snrq_inv_st0 = 0; + assign snrq_tag_st0 = 0; + assign snp_req_ready = 0; + end wire dfpq_pop; wire dfpq_empty; - wire dfpq_full; + wire [`LINE_ADDR_WIDTH-1:0] dfpq_addr_st0; wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0; wire dram_rsp_fire = dram_rsp_valid && dram_rsp_ready; - assign dram_rsp_ready = !dfpq_full; - + if (DRAM_ENABLE) begin + + wire dfpq_full; + assign dram_rsp_ready = !dfpq_full; + VX_generic_queue #( .DATAW(`LINE_ADDR_WIDTH + $bits(dram_rsp_data)), .SIZE(DRFQ_SIZE) @@ -191,9 +209,9 @@ module VX_bank #( `UNUSED_VAR (dram_rsp_addr) `UNUSED_VAR (dram_rsp_data) assign dfpq_empty = 1; - assign dfpq_full = 0; assign dfpq_addr_st0 = 0; - assign dfpq_filldata_st0 = 0; + assign dfpq_filldata_st0 = 0; + assign dram_rsp_ready = 0; end wire reqq_pop; @@ -256,7 +274,7 @@ module VX_bank #( wire mshr_rw_st0; wire [WORD_SIZE-1:0] mshr_byteen_st0; wire mshr_is_snp_st0; - wire mshr_snp_invalidate_st0; + wire mshr_snp_inv_st0; wire is_mshr_miss_st2; wire is_mshr_miss_st3; @@ -266,8 +284,6 @@ module VX_bank #( wire snpq_push_stall; wire pipeline_stall; - wire is_fill_st1; - // determine which queue to pop next in piority order wire mshr_pop_unqual = mshr_valid_st0; wire dfpq_pop_unqual = !mshr_pop_unqual && !dfpq_empty; @@ -280,35 +296,66 @@ module VX_bank #( assign reqq_pop = reqq_pop_unqual && !pipeline_stall; assign snrq_pop = snrq_pop_unqual && !pipeline_stall; - wire is_fill_st0; - wire valid_st0; - wire [`LINE_ADDR_WIDTH-1:0] addr_st0; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st0; - wire is_mshr_st0; - - wire [`WORD_WIDTH-1:0] writeword_st0; - wire [`BANK_LINE_WIDTH-1:0] writedata_st0; - wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st0; - wire is_snp_st0; - wire snp_invalidate_st0; - wire mshr_pending_hazard_unqual_st0; + wire is_fill_st0; + wire is_mshr_st0; + wire is_snp_st0; + wire valid_st0; + wire [`LINE_ADDR_WIDTH-1:0] addr_st0; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st0; + wire [`WORD_WIDTH-1:0] writeword_st0; + wire [`BANK_LINE_WIDTH-1:0] writedata_st0; + wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st0; + wire snp_inv_st0; + wire mshr_pending_hazard_unqual_st0; - wire valid_st1; - wire [`LINE_ADDR_WIDTH-1:0] addr_st1; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1; - wire [`WORD_WIDTH-1:0] writeword_st1; - wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1; - wire [`BANK_LINE_WIDTH-1:0] writedata_st1; - wire is_snp_st1; - wire snp_invalidate_st1; - wire is_mshr_st1; - wire mshr_pending_hazard_st1; - wire miss_st3; - wire force_miss_st3; - wire [`LINE_ADDR_WIDTH-1:0] addr_st3; + wire is_fill_st1; + wire is_mshr_st1; + wire is_snp_st1; + wire valid_st1; + wire [`LINE_ADDR_WIDTH-1:0] addr_st1; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1; + wire [`WORD_WIDTH-1:0] writeword_st1; + wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1; + wire [`BANK_LINE_WIDTH-1:0] writedata_st1; + wire snp_inv_st1; + + wire [`TAG_SELECT_BITS-1:0] readtag_st1; + wire miss_st1; + wire force_miss_st1; + wire dirty_st1; + wire [WORD_SIZE-1:0] mem_byteen_st1; + wire writeen_st1; + wire mem_rw_st1; +`DEBUG_BEGIN + wire [`REQ_TAG_WIDTH-1:0] tag_st1; + wire [`REQS_BITS-1:0] tid_st1; +`DEBUG_END + + wire valid_st2; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2; + wire [`WORD_WIDTH-1:0] writeword_st2; + wire [`WORD_WIDTH-1:0] readword_st2; + wire [`BANK_LINE_WIDTH-1:0] readdata_st2; + wire [`BANK_LINE_WIDTH-1:0] writedata_st2; + wire [WORD_SIZE-1:0] mem_byteen_st2; + wire dirty_st2; + wire [BANK_LINE_SIZE-1:0] dirtyb_st2; + wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2; + wire [`TAG_SELECT_BITS-1:0] readtag_st2; + wire is_fill_st2; + wire is_snp_st2; + wire snp_inv_st2; + wire is_mshr_st2; + wire miss_st2; + wire force_miss_st2; + wire[`LINE_ADDR_WIDTH-1:0] addr_st2; + wire writeen_st2; + + wire miss_st3; + wire force_miss_st3; + wire [`LINE_ADDR_WIDTH-1:0] addr_st3; assign is_mshr_st0 = mshr_pop_unqual; - assign is_fill_st0 = dfpq_pop_unqual; assign valid_st0 = dfpq_pop || mshr_pop || reqq_pop || snrq_pop; @@ -339,17 +386,13 @@ module VX_bank #( snrq_pop_unqual ? 1 : 0; - assign snp_invalidate_st0 = mshr_pop_unqual ? mshr_snp_invalidate_st0 : - snrq_pop_unqual ? snrq_invalidate_st0 : - 0; + assign snp_inv_st0 = mshr_pop_unqual ? mshr_snp_inv_st0 : + snrq_pop_unqual ? snrq_inv_st0 : + 0; assign writeword_st0 = mshr_pop_unqual ? mshr_writeword_st0 : reqq_pop_unqual ? reqq_writeword_st0 : - 0; - - // we have a miss in msrq or in stage 3 for the current address - wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0 - || ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0)); + 0; `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin @@ -359,6 +402,14 @@ module VX_bank #( end `endif +if (DRAM_ENABLE) begin + + wire mshr_pending_hazard_st1; + + // we have a miss in msrq or in stage 3 for the current address + wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0 + || ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0)); + VX_generic_register #( .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) ) pipe_reg0 ( @@ -366,8 +417,8 @@ module VX_bank #( .reset (reset), .stall (pipeline_stall), .flush (1'b0), - .in ({is_mshr_st0, is_snp_st0, snp_invalidate_st0, mshr_pending_hazard_st0, valid_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}), - .out ({is_mshr_st1, is_snp_st1, snp_invalidate_st1, mshr_pending_hazard_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) + .in ({is_mshr_st0, is_snp_st0, snp_inv_st0, mshr_pending_hazard_st0, valid_st0, addr_st0, wsel_st0, writeword_st0, inst_meta_st0, is_fill_st0, writedata_st0}), + .out ({is_mshr_st1, is_snp_st1, snp_inv_st1, mshr_pending_hazard_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) ); `ifdef DBG_CACHE_REQ_INFO @@ -378,21 +429,6 @@ module VX_bank #( end `endif - wire[`TAG_SELECT_BITS-1:0] readtag_st1; - wire writeen_st1; - wire writeen_st2; - wire miss_st1; - wire miss_st2; - wire dirty_st1; - wire mem_rw_st1; - wire [WORD_SIZE-1:0] mem_byteen_st1; - wire force_miss_st2; - wire[`LINE_ADDR_WIDTH-1:0] addr_st2; -`DEBUG_BEGIN - wire [`REQ_TAG_WIDTH-1:0] tag_st1; - wire [`REQS_BITS-1:0] tid_st1; -`DEBUG_END - assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1; // we have a matching previous request that missed alreedy @@ -401,9 +437,9 @@ module VX_bank #( // force miss to ensure commit order when a new request has pending previous requests to same block // also force a miss for msrq requests when previous requests got a miss - wire force_miss_st1 = (valid_st1 && !is_mshr_st1 && !is_fill_st1 - && (mshr_pending_hazard_st1 || st2_pending_hazard_st1 || st3_pending_hazard_st1)) - || (valid_st1 && is_mshr_st1 && is_mshr_miss_st2); + assign force_miss_st1 = (valid_st1 && !is_mshr_st1 && !is_fill_st1 + && (mshr_pending_hazard_st1 || st2_pending_hazard_st1 || st3_pending_hazard_st1)) + || (valid_st1 && is_mshr_st1 && is_mshr_miss_st2); VX_tag_access #( .BANK_ID (BANK_ID), @@ -412,9 +448,9 @@ module VX_bank #( .CACHE_SIZE (CACHE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE), - .DRAM_ENABLE (DRAM_ENABLE), - .WRITE_ENABLE (WRITE_ENABLE) + .WORD_SIZE (WORD_SIZE), + .WRITE_ENABLE (WRITE_ENABLE), + .FLUSH_ENABLE (FLUSH_ENABLE) ) tag_access ( .clk (clk), .reset (reset), @@ -434,7 +470,7 @@ module VX_bank #( .is_write_in (mem_rw_st1), .is_fill_in (is_fill_st1), .is_snp_in (is_snp_st1), - .snp_invalidate_in(snp_invalidate_st1), + .snp_inv_in (snp_inv_st1), .force_miss_in (force_miss_st1), // Outputs @@ -446,22 +482,6 @@ module VX_bank #( assign misses = miss_st1; - wire valid_st2; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2; - wire [`WORD_WIDTH-1:0] writeword_st2; - wire [`WORD_WIDTH-1:0] readword_st2; - wire [`BANK_LINE_WIDTH-1:0] readdata_st2; - wire [`BANK_LINE_WIDTH-1:0] writedata_st2; - wire [WORD_SIZE-1:0] mem_byteen_st2; - wire dirty_st2; - wire [BANK_LINE_SIZE-1:0] dirtyb_st2; - wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2; - wire [`TAG_SELECT_BITS-1:0] readtag_st2; - wire is_fill_st2; - wire is_snp_st2; - wire snp_invalidate_st2; - wire is_mshr_st2; - VX_generic_register #( .N(1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH) ) pipe_reg1 ( @@ -469,9 +489,53 @@ module VX_bank #( .reset (reset), .stall (pipeline_stall), .flush (1'b0), - .in ({is_mshr_st1, writeen_st1, force_miss_st1, is_snp_st1, snp_invalidate_st1, is_fill_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, dirty_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}), - .out ({is_mshr_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_invalidate_st2, is_fill_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2}) - ); + .in ({is_mshr_st1, writeen_st1, force_miss_st1, is_snp_st1, snp_inv_st1, is_fill_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, dirty_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}), + .out ({is_mshr_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_inv_st2, is_fill_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2}) + ); + +end else begin + + `UNUSED_VAR (mshr_pending_hazard_unqual_st0) + + assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1; + + assign is_fill_st1 = is_fill_st0; + assign is_mshr_st1 = is_mshr_st0; + assign is_snp_st1 = is_snp_st0; + assign valid_st1 = valid_st0; + assign wsel_st1 = wsel_st0; + assign writeword_st1= writeword_st0; + assign writedata_st1= writedata_st0; + assign inst_meta_st1= inst_meta_st0; + assign snp_inv_st1 = snp_inv_st0; + assign addr_st1 = addr_st0; + assign mem_byteen_st1 = 0; + assign dirty_st1 = 0; + assign readtag_st1 = 0; + assign miss_st1 = 0; + assign writeen_st1 = valid_st1 && mem_rw_st1; + assign force_miss_st1 = 0; + + assign is_fill_st2 = is_fill_st1; + assign is_mshr_st2 = is_mshr_st1; + assign is_snp_st2 = is_snp_st1; + assign valid_st2 = valid_st1; + assign wsel_st2 = wsel_st1; + assign writeword_st2= writeword_st1; + assign writedata_st2= writedata_st1; + assign inst_meta_st2= inst_meta_st1; + assign snp_inv_st2 = snp_inv_st1; + assign addr_st2 = addr_st1; + assign mem_byteen_st2 = mem_byteen_st1; + assign dirty_st2 = dirty_st1; + assign readtag_st2 = readtag_st1; + assign miss_st2 = miss_st1; + assign writeen_st2 = writeen_st1; + assign force_miss_st2 = force_miss_st1; + + assign misses = 0; + +end `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin @@ -491,7 +555,6 @@ module VX_bank #( .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), - .DRAM_ENABLE (DRAM_ENABLE), .WRITE_ENABLE (WRITE_ENABLE) ) data_access ( .clk (clk), @@ -531,7 +594,7 @@ module VX_bank #( wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st3; wire [`TAG_SELECT_BITS-1:0] readtag_st3; wire is_snp_st3; - wire snp_invalidate_st3; + wire snp_inv_st3; wire is_mshr_st3; wire send_core_rsp_st3; wire send_dwb_req_st3; @@ -569,8 +632,8 @@ module VX_bank #( .reset (reset), .stall (pipeline_stall), .flush (1'b0), - .in ({is_mshr_st2, incoming_fill_st2, send_core_rsp_st2, send_dwb_req_st2, do_writeback_st2, send_snp_rsp_st2, force_miss_st2, is_snp_st2, snp_invalidate_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}), - .out ({is_mshr_st3, incoming_fill_st3, send_core_rsp_st3, send_dwb_req_st3, do_writeback_st3, send_snp_rsp_st3, force_miss_st3, is_snp_st3, snp_invalidate_st3, valid_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3}) + .in ({is_mshr_st2, incoming_fill_st2, send_core_rsp_st2, send_dwb_req_st2, do_writeback_st2, send_snp_rsp_st2, force_miss_st2, is_snp_st2, snp_inv_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}), + .out ({is_mshr_st3, incoming_fill_st3, send_core_rsp_st3, send_dwb_req_st3, do_writeback_st3, send_snp_rsp_st3, force_miss_st3, is_snp_st3, snp_inv_st3, valid_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3}) ); `ifdef DBG_CACHE_REQ_INFO @@ -647,7 +710,7 @@ module VX_bank #( .enqueue_rw_st3 (req_rw_st3), .enqueue_byteen_st3 (req_byteen_st3), .enqueue_is_snp_st3 (is_snp_st3), - .enqueue_snp_inv_st3(snp_invalidate_st3), + .enqueue_snp_inv_st3(snp_inv_st3), .enqueue_mshr_st3 (is_mshr_st3), .enqueue_ready_st3 (mshr_init_ready_state_st3), .enqueue_full (mshr_full), @@ -669,7 +732,7 @@ module VX_bank #( .dequeue_rw_st0 (mshr_rw_st0), .dequeue_byteen_st0 (mshr_byteen_st0), .dequeue_is_snp_st0 (mshr_is_snp_st0), - .dequeue_snp_inv_st0(mshr_snp_invalidate_st0), + .dequeue_snp_inv_st0(mshr_snp_inv_st0), .dequeue_st3 (mshr_dequeue_st3) ); end else begin @@ -677,7 +740,7 @@ module VX_bank #( `UNUSED_VAR (mshr_push) `UNUSED_VAR (wsel_st3) `UNUSED_VAR (writeword_st3) - `UNUSED_VAR (snp_invalidate_st3) + `UNUSED_VAR (snp_inv_st3) `UNUSED_VAR (req_byteen_st3) `UNUSED_VAR (is_snp_st3) `UNUSED_VAR (incoming_fill_st3) @@ -693,7 +756,7 @@ module VX_bank #( assign mshr_rw_st0 = 0; assign mshr_byteen_st0 = 0; assign mshr_is_snp_st0 = 0; - assign mshr_snp_invalidate_st0 = 0; + assign mshr_snp_inv_st0 = 0; end // Enqueue core response @@ -777,9 +840,9 @@ module VX_bank #( `UNUSED_VAR (dirtyb_st3) `UNUSED_VAR (readdata_st3) `UNUSED_VAR (dram_req_ready) - assign dwbq_empty = 1; - assign dwbq_full = 0; - assign dram_req_rw = 0; + assign dwbq_empty = 1; + assign dwbq_full = 0; + assign dram_req_rw = 0; assign dram_req_byteen = 0; assign dram_req_addr = 0; assign dram_req_data = 0; @@ -824,10 +887,10 @@ module VX_bank #( `UNUSED_VAR (snpq_push) `UNUSED_VAR (snpq_pop) `UNUSED_VAR (snpq_tag_st3) - assign snpq_empty = 1; - assign snpq_full = 0; - assign snp_rsp_tag = 0; `UNUSED_VAR (snp_rsp_ready) + assign snpq_empty = 1; + assign snpq_full = 0; + assign snp_rsp_tag = 0; end assign snp_rsp_valid = !snpq_empty @@ -844,7 +907,8 @@ module VX_bank #( `SCOPE_ASSIGN (valid_st2, valid_st2); `SCOPE_ASSIGN (valid_st3, valid_st3); - `SCOPE_ASSIGN (is_mshr_st1, is_mshr_st1); + `SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0); + `SCOPE_ASSIGN (miss_st1, miss_st1); `SCOPE_ASSIGN (dirty_st1, dirty_st1); `SCOPE_ASSIGN (force_miss_st1, force_miss_st1); @@ -875,7 +939,7 @@ module VX_bank #( $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_byteen_st0, reqq_writeword_st0, debug_wid_st0, debug_pc_st0); end if (snrq_pop) begin - $display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_invalidate_st0); + $display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_inv_st0); end if (cwbq_push) begin $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), cwbq_tag_st3, cwbq_tid_st3, cwbq_data_st3, debug_wid_st3, debug_pc_st3); diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 3c2b9df1..40919cd8 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -89,7 +89,7 @@ module VX_cache #( // Snoop request input wire snp_req_valid, input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_invalidate, + input wire snp_req_inv, input wire [SNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, @@ -186,7 +186,7 @@ module VX_cache #( wire curr_bank_snp_req_valid; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr; - wire curr_bank_snp_req_invalidate; + wire curr_bank_snp_req_inv; wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_req_tag; wire curr_bank_snp_req_ready; @@ -243,9 +243,9 @@ module VX_cache #( assign curr_bank_snp_req_valid = snp_req_valid && (`DRAM_ADDR_BANK(snp_req_addr) == i); assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr); end - assign curr_bank_snp_req_invalidate = snp_req_invalidate; - assign curr_bank_snp_req_tag = snp_req_tag; - assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready; + assign curr_bank_snp_req_inv = snp_req_inv; + assign curr_bank_snp_req_tag = snp_req_tag; + assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready; // Snoop response assign per_bank_snp_rsp_valid[i] = curr_bank_snp_rsp_valid; @@ -314,7 +314,7 @@ module VX_cache #( // Snoop request .snp_req_valid (curr_bank_snp_req_valid), .snp_req_addr (curr_bank_snp_req_addr), - .snp_req_invalidate (curr_bank_snp_req_invalidate), + .snp_req_inv (curr_bank_snp_req_inv), .snp_req_tag (curr_bank_snp_req_tag), .snp_req_ready (curr_bank_snp_req_ready), diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index 909753ce..43edb1a7 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -14,7 +14,7 @@ // tag rw byteen tid `define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS) -// data metadata word_sel is_snp snp_invalidate +// data metadata word_sel is_snp snp_inv `define MSHR_METADATA_WIDTH (`WORD_WIDTH + `REQ_INST_META_WIDTH + `UP(`WORD_SELECT_WIDTH) + 1 + 1) `define BANK_BITS `LOG2UP(NUM_BANKS) diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index ed8b84ef..9ab9c0de 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -16,9 +16,6 @@ module VX_data_access #( // Enable cache writeable parameter WRITE_ENABLE = 0, - // Enable dram update - parameter DRAM_ENABLE = 0, - // size of tag id in core request tag parameter CORE_TAG_ID_BITS = 0 ) ( @@ -54,14 +51,12 @@ module VX_data_access #( output wire[BANK_LINE_SIZE-1:0] dirtyb_out ); - wire[BANK_LINE_SIZE-1:0] qual_read_dirtyb_out; - wire[`BANK_LINE_WIDTH-1:0] qual_read_data; + wire[BANK_LINE_SIZE-1:0] read_dirtyb_out; + wire[`BANK_LINE_WIDTH-1:0] read_data; - wire[BANK_LINE_SIZE-1:0] use_read_dirtyb_out; - wire[`BANK_LINE_WIDTH-1:0] use_read_data; - wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] use_byte_enable; - wire[`BANK_LINE_WIDTH-1:0] use_write_data; - wire use_write_enable; + wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] byte_enable; + wire write_enable; + wire[`BANK_LINE_WIDTH-1:0] write_data; wire[`LINE_SELECT_BITS-1:0] addrline = addr_in[`LINE_SELECT_BITS-1:0]; @@ -69,68 +64,63 @@ module VX_data_access #( .CACHE_SIZE (CACHE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), - .WORD_SIZE (WORD_SIZE) + .WORD_SIZE (WORD_SIZE), + .WRITE_ENABLE (WRITE_ENABLE) ) data_store ( .clk (clk), .reset (reset), .read_addr (addrline), - .read_dirtyb (qual_read_dirtyb_out), - .read_data (qual_read_data), + .read_dirtyb (read_dirtyb_out), + .read_data (read_data), - .write_enable(use_write_enable), + .write_enable(write_enable), .write_fill (is_fill_in), - .byte_enable (use_byte_enable), + .byte_enable (byte_enable), .write_addr (addrline), - .write_data (use_write_data) + .write_data (write_data) ); - - assign use_read_dirtyb_out = qual_read_dirtyb_out; - assign use_read_data = qual_read_data; if (`WORD_SELECT_WIDTH != 0) begin - wire [`WORD_WIDTH-1:0] readword = use_read_data[wordsel_in * `WORD_WIDTH +: `WORD_WIDTH]; + wire [`WORD_WIDTH-1:0] readword = read_data[wordsel_in * `WORD_WIDTH +: `WORD_WIDTH]; for (genvar i = 0; i < WORD_SIZE; i++) begin assign readword_out[i * 8 +: 8] = readword[i * 8 +: 8] & {8{byteen_in[i]}}; end end else begin for (genvar i = 0; i < WORD_SIZE; i++) begin - assign readword_out[i * 8 +: 8] = use_read_data[i * 8 +: 8] & {8{byteen_in[i]}}; + assign readword_out[i * 8 +: 8] = read_data[i * 8 +: 8] & {8{byteen_in[i]}}; end end - wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] byte_enable; - wire [`BANK_LINE_WIDTH-1:0] data_write; - for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin - wire word_sel = ((`WORD_SELECT_WIDTH == 0) || (wordsel_in == `UP(`WORD_SELECT_WIDTH)'(i))); + wire word_sel = (`WORD_SELECT_WIDTH == 0) || (wordsel_in == `UP(`WORD_SELECT_WIDTH)'(i)); assign byte_enable[i] = is_fill_in ? {WORD_SIZE{1'b1}} : - word_sel ? byteen_in : - {WORD_SIZE{1'b0}}; + word_sel ? byteen_in : + {WORD_SIZE{1'b0}}; - assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = is_fill_in ? writedata_in[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_in; + assign write_data[i * `WORD_WIDTH +: `WORD_WIDTH] = is_fill_in ? writedata_in[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_in; end - assign use_write_enable = valid_in && writeen_in && !stall; - assign use_byte_enable = byte_enable; - assign use_write_data = data_write; + assign write_enable = valid_in + && writeen_in + && !stall; - assign dirtyb_out = use_read_dirtyb_out; - assign readdata_out = use_read_data; + assign dirtyb_out = read_dirtyb_out; + assign readdata_out = read_data; `ifdef DBG_PRINT_CACHE_DATA always @(posedge clk) begin if (valid_in && !stall) begin - if (use_write_enable) begin + if (write_enable) begin if (is_fill_in) begin - $display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), dirtyb_out, addrline, use_write_data); + $display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), dirtyb_out, addrline, write_data); end else begin $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, byte_enable, dirtyb_out, addrline, wordsel_in, writeword_in); end end else begin - $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, qual_read_data); + $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, read_data); end end end diff --git a/hw/rtl/cache/VX_data_store.v b/hw/rtl/cache/VX_data_store.v index ac91dd69..53253bbb 100644 --- a/hw/rtl/cache/VX_data_store.v +++ b/hw/rtl/cache/VX_data_store.v @@ -8,7 +8,10 @@ module VX_data_store #( // Number of banks parameter NUM_BANKS = 1, // Size of a word in bytes - parameter WORD_SIZE = 1 + parameter WORD_SIZE = 1, + + // Enable cache writeable + parameter WRITE_ENABLE = 0 ) ( input wire clk, input wire reset, @@ -25,13 +28,19 @@ module VX_data_store #( ); `UNUSED_VAR (reset) - reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0]; - always @(posedge clk) begin - if (write_enable) begin - dirtyb[write_addr] <= write_fill ? 0 : (dirtyb[write_addr] | byte_enable); + if (WRITE_ENABLE) begin + reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0]; + always @(posedge clk) begin + if (write_enable) begin + dirtyb[write_addr] <= write_fill ? 0 : (dirtyb[write_addr] | byte_enable); + end end + assign read_dirtyb = dirtyb [read_addr]; + end else begin + `UNUSED_VAR (write_fill) + `UNUSED_VAR (byte_enable) + assign read_dirtyb = 0; end - assign read_dirtyb = dirtyb [read_addr]; VX_dp_ram #( .DATAW(`BANK_LINE_WORDS * WORD_SIZE * 8), diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index 80e39cd2..985e0f71 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -14,21 +14,21 @@ module VX_snp_forwarder #( // Snoop request input wire snp_req_valid, input wire [SRC_ADDR_WIDTH-1:0] snp_req_addr, - input wire snp_req_invalidate, + input wire snp_req_inv, input wire [SNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, // Snoop response output wire snp_rsp_valid, output wire [SRC_ADDR_WIDTH-1:0] snp_rsp_addr, - output wire snp_rsp_invalidate, + output wire snp_rsp_inv, output wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag, input wire snp_rsp_ready, // Snoop Forwarding out output wire [NUM_REQUESTS-1:0] snp_fwdout_valid, output wire [NUM_REQUESTS-1:0][DST_ADDR_WIDTH-1:0] snp_fwdout_addr, - output wire [NUM_REQUESTS-1:0] snp_fwdout_invalidate, + output wire [NUM_REQUESTS-1:0] snp_fwdout_inv, output wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdout_tag, input wire [NUM_REQUESTS-1:0] snp_fwdout_ready, @@ -70,8 +70,8 @@ module VX_snp_forwarder #( .write_addr (sfq_write_addr), .acquire_slot (sfq_acquire), .read_addr (sfq_read_addr), - .write_data ({snp_req_addr, snp_req_invalidate, snp_req_tag}), - .read_data ({snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}), + .write_data ({snp_req_addr, snp_req_inv, snp_req_tag}), + .read_data ({snp_rsp_addr, snp_rsp_inv, snp_rsp_tag}), .release_addr (sfq_read_addr), .release_slot (sfq_release), .full (sfq_full) @@ -80,14 +80,14 @@ module VX_snp_forwarder #( wire fwdout_valid; wire [`LOG2UP(SNRQ_SIZE)-1:0] fwdout_tag; wire [DST_ADDR_WIDTH-1:0] fwdout_addr; - wire fwdout_invalidate; + wire fwdout_inv; wire fwdout_ready; wire dispatch_hold; if (ADDR_DIFF != 0) begin reg [`LOG2UP(SNRQ_SIZE)-1:0] fwdout_tag_r; reg [DST_ADDR_WIDTH-1:0] fwdout_addr_r; - reg fwdout_invalidate_r; + reg fwdout_inv_r; reg dispatch_hold_r; always @(posedge clk) begin @@ -110,21 +110,21 @@ module VX_snp_forwarder #( end if (snp_req_valid && snp_req_ready) begin - fwdout_invalidate_r <= snp_req_invalidate; - fwdout_tag_r <= sfq_write_addr; + fwdout_inv_r <= snp_req_inv; + fwdout_tag_r <= sfq_write_addr; end end - assign fwdout_valid = dispatch_hold_r || (snp_req_valid && !sfq_full); - assign fwdout_tag = dispatch_hold_r ? fwdout_tag_r : sfq_write_addr; - assign fwdout_addr = dispatch_hold_r ? fwdout_addr_r : {snp_req_addr, ADDR_DIFF'(0)}; - assign fwdout_invalidate = dispatch_hold_r ? fwdout_invalidate_r : snp_req_invalidate; - assign dispatch_hold = dispatch_hold_r; + assign fwdout_valid = dispatch_hold_r || (snp_req_valid && !sfq_full); + assign fwdout_tag = dispatch_hold_r ? fwdout_tag_r : sfq_write_addr; + assign fwdout_addr = dispatch_hold_r ? fwdout_addr_r : {snp_req_addr, ADDR_DIFF'(0)}; + assign fwdout_inv = dispatch_hold_r ? fwdout_inv_r : snp_req_inv; + assign dispatch_hold= dispatch_hold_r; end else begin - assign fwdout_valid = snp_req_valid && !sfq_full; - assign fwdout_tag = sfq_write_addr; - assign fwdout_addr = snp_req_addr; - assign fwdout_invalidate = snp_req_invalidate; - assign dispatch_hold = 1'b0; + assign fwdout_valid = snp_req_valid && !sfq_full; + assign fwdout_tag = sfq_write_addr; + assign fwdout_addr = snp_req_addr; + assign fwdout_inv = snp_req_inv; + assign dispatch_hold= 1'b0; end always @(posedge clk) begin @@ -139,10 +139,10 @@ module VX_snp_forwarder #( reg [NUM_REQUESTS-1:0] snp_fwdout_ready_other; for (genvar i = 0; i < NUM_REQUESTS; i++) begin - assign snp_fwdout_valid[i] = fwdout_valid && snp_fwdout_ready_other[i]; - assign snp_fwdout_addr[i] = fwdout_addr; - assign snp_fwdout_invalidate[i] = fwdout_invalidate; - assign snp_fwdout_tag[i] = fwdout_tag; + assign snp_fwdout_valid[i] = fwdout_valid && snp_fwdout_ready_other[i]; + assign snp_fwdout_addr[i] = fwdout_addr; + assign snp_fwdout_inv[i] = fwdout_inv; + assign snp_fwdout_tag[i] = fwdout_tag; end always @(*) begin @@ -201,16 +201,16 @@ module VX_snp_forwarder #( `ifdef DBG_PRINT_CACHE_SNP always @(posedge clk) begin if (snp_req_valid && snp_req_ready) begin - $display("%t: cache%0d snp-fwd-req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `TO_FULL_ADDR(snp_req_addr), snp_req_invalidate, snp_req_tag); + $display("%t: cache%0d snp-fwd-req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `TO_FULL_ADDR(snp_req_addr), snp_req_inv, snp_req_tag); end if (snp_fwdout_valid[0] && snp_fwdout_ready[0]) begin - $display("%t: cache%0d snp-fwd-out: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `TO_FULL_ADDR(snp_fwdout_addr[0]), snp_fwdout_invalidate[0], snp_fwdout_tag[0]); + $display("%t: cache%0d snp-fwd-out: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `TO_FULL_ADDR(snp_fwdout_addr[0]), snp_fwdout_inv[0], snp_fwdout_tag[0]); end if (fwdin_valid && fwdin_ready) begin $display("%t: cache%0d snp-fwd-in: tag=%0h", $time, CACHE_ID, fwdin_tag); end if (snp_rsp_valid && snp_rsp_ready) begin - $display("%t: cache%0d snp-fwd-rsp: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag); + $display("%t: cache%0d snp-fwd-rsp: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_inv, snp_rsp_tag); end end `endif diff --git a/hw/rtl/cache/VX_tag_access.v b/hw/rtl/cache/VX_tag_access.v index ebc4e77c..b5b5e976 100644 --- a/hw/rtl/cache/VX_tag_access.v +++ b/hw/rtl/cache/VX_tag_access.v @@ -16,8 +16,8 @@ module VX_tag_access #( // Enable cache writeable parameter WRITE_ENABLE = 0, - // Enable dram update - parameter DRAM_ENABLE = 0, + // Enable cache flush + parameter FLUSH_ENABLE = 1, // size of tag id in core request tag parameter CORE_TAG_ID_BITS = 0 @@ -42,7 +42,7 @@ module VX_tag_access #( input wire is_write_in, input wire is_fill_in, input wire is_snp_in, - input wire snp_invalidate_in, + input wire snp_inv_in, input wire force_miss_in, // Outputs @@ -52,20 +52,16 @@ module VX_tag_access #( output wire writeen_out ); - wire qual_read_valid; - wire qual_read_dirty; - wire[`TAG_SELECT_BITS-1:0] qual_read_tag; + wire read_valid; + wire read_dirty; + wire[`TAG_SELECT_BITS-1:0] read_tag; - wire use_read_valid; - wire use_read_dirty; - wire[`TAG_SELECT_BITS-1:0] use_read_tag; - - wire use_do_fill; - wire use_do_write; - wire use_invalidate; + wire do_fill; + wire do_write; + wire do_invalidate; - wire[`TAG_SELECT_BITS-1:0] addrtag = addr_in[`TAG_LINE_ADDR_RNG]; - wire[`LINE_SELECT_BITS-1:0] addrline = addr_in[`LINE_SELECT_BITS-1:0]; + wire [`TAG_SELECT_BITS-1:0] addrtag = addr_in [`TAG_LINE_ADDR_RNG]; + wire [`LINE_SELECT_BITS-1:0] addrline = addr_in [`LINE_SELECT_BITS-1:0]; VX_tag_store #( .CACHE_SIZE (CACHE_SIZE), @@ -77,68 +73,69 @@ module VX_tag_access #( .reset (reset), .read_addr (addrline), - .read_valid (qual_read_valid), - .read_dirty (qual_read_dirty), - .read_tag (qual_read_tag), + .read_valid (read_valid), + .read_dirty (read_dirty), + .read_tag (read_tag), - .do_fill (use_do_fill), - .do_write (use_do_write), - .invalidate (use_invalidate), + .do_fill (do_fill), + .do_write (do_write), + .invalidate (do_invalidate), .write_addr (addrline), .write_tag (addrtag) ); - assign use_read_valid = qual_read_valid || !DRAM_ENABLE; // If shared memory, always valid - assign use_read_dirty = qual_read_dirty && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache - assign use_read_tag = DRAM_ENABLE ? qual_read_tag : addrtag; // Tag is always the same in SM - // use "case equality" to handle uninitialized tag when block entry is not valid - wire tags_match = use_read_valid && (addrtag === use_read_tag); + wire tags_match = read_valid && (addrtag === read_tag); - assign use_do_write = valid_in - && tags_match - && !is_snp_in - && !is_fill_in - && is_write_in - && !force_miss_in - && !stall; + assign do_write = WRITE_ENABLE + && valid_in + && tags_match + && !is_snp_in + && !is_fill_in + && is_write_in + && !force_miss_in + && !stall; - assign use_do_fill = valid_in - && is_fill_in - && !stall; + assign do_fill = valid_in + && is_fill_in + && !stall; - assign use_invalidate = valid_in - && tags_match - && is_snp_in - && (use_read_dirty || snp_invalidate_in) - && !force_miss_in - && !stall; + assign do_invalidate = FLUSH_ENABLE + && valid_in + && tags_match + && is_snp_in + && (read_dirty || snp_inv_in) + && !force_miss_in + && !stall; assign miss_out = valid_in && !tags_match && !is_snp_in && !is_fill_in; - assign dirty_out = valid_in && use_read_valid && use_read_dirty + assign dirty_out = WRITE_ENABLE + && valid_in + && read_valid + && read_dirty && !(is_fill_in && tags_match); // discard writeback for redundant fills - assign readtag_out = use_read_tag; + assign readtag_out = read_tag; - assign writeen_out = use_do_write || (use_do_fill - && !tags_match); // discard data update for redundant fills + assign writeen_out = do_write || (do_fill + && !tags_match); // discard data update for redundant fills `ifdef DBG_PRINT_CACHE_TAG always @(posedge clk) begin if (valid_in && !stall) begin - if (use_do_fill) begin - $display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), addrline, addrtag, qual_read_tag); + if (do_fill) begin + $display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), addrline, addrtag, read_tag); if (tags_match) begin $display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID)); end end else if (tags_match) begin - $display("%t: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, use_read_dirty, addrline, addrtag); + $display("%t: cache%0d:%0d tag-hit: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, read_dirty, addrline, addrtag); end else begin - $display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, use_read_dirty, addrline, addrtag, qual_read_tag); + $display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, read_dirty, addrline, addrtag, read_tag); end end end diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index fa685bc1..26a24154 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -118,7 +118,7 @@ "dram_rsp_tag":"`VX_DRAM_TAG_WIDTH", "?snp_req_fire": 1, "snp_req_addr": 32, - "snp_req_invalidate": 1, + "snp_req_inv": 1, "snp_req_tag":"`VX_SNP_TAG_WIDTH", "?snp_rsp_fire": 1, "snp_rsp_tag":"`VX_SNP_TAG_WIDTH", @@ -208,7 +208,7 @@ "addr_st1": 32, "addr_st2": 32, "addr_st3": 32, - "is_mshr_st1": 1, + "is_mshr_st0": 1, "miss_st1": 1, "dirty_st1": 1, "!force_miss_st1": 1,