From 31ffbe0d6ad8424ce7a196ce138bc07de6a5a32e Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 1 Sep 2020 03:39:03 -0700 Subject: [PATCH] clean up 'stage_1_cycles' from cache --- hw/rtl/VX_cluster.v | 1 - hw/rtl/VX_config.vh | 25 ---- hw/rtl/VX_mem_unit.v | 3 - hw/rtl/Vortex.v | 1 - hw/rtl/cache/VX_bank.v | 214 ++++++++++++------------------ hw/rtl/cache/VX_cache.v | 3 - hw/rtl/cache/VX_tag_data_access.v | 191 +++++++++++--------------- 7 files changed, 167 insertions(+), 271 deletions(-) diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 9a9c4a8f..f1cba93d 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -365,7 +365,6 @@ module VX_cluster #( .NUM_BANKS (`L2NUM_BANKS), .WORD_SIZE (`L2WORD_SIZE), .NUM_REQUESTS (`L2NUM_REQUESTS), - .STAGE_1_CYCLES (`L2STAGE_1_CYCLES), .CREQ_SIZE (`L2CREQ_SIZE), .MRVQ_SIZE (`L2MRVQ_SIZE), .DFPQ_SIZE (`L2DFPQ_SIZE), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 358ec37d..0cd84da3 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -193,11 +193,6 @@ `define DWORD_SIZE 4 `endif -// Number of cycles to complete stage 1 (read from memory) -`ifndef DSTAGE_1_CYCLES -`define DSTAGE_1_CYCLES 1 -`endif - // Core Request Queue Size `ifndef DCREQ_SIZE `define DCREQ_SIZE `NUM_WARPS @@ -264,11 +259,6 @@ `define IWORD_SIZE 4 `endif -// Number of cycles to complete stage 1 (read from memory) -`ifndef ISTAGE_1_CYCLES -`define ISTAGE_1_CYCLES 1 -`endif - // Core Request Queue Size `ifndef ICREQ_SIZE `define ICREQ_SIZE `NUM_WARPS @@ -330,11 +320,6 @@ `define SWORD_SIZE 4 `endif -// Number of cycles to complete stage 1 (read from memory) -`ifndef SSTAGE_1_CYCLES -`define SSTAGE_1_CYCLES 1 -`endif - // Core Request Queue Size `ifndef SCREQ_SIZE `define SCREQ_SIZE `NUM_WARPS @@ -367,11 +352,6 @@ `define L2WORD_SIZE `L2BANK_LINE_SIZE `endif -// Number of cycles to complete stage 1 (read from memory) -`ifndef L2STAGE_1_CYCLES -`define L2STAGE_1_CYCLES 1 -`endif - // Core Request Queue Size `ifndef L2CREQ_SIZE `define L2CREQ_SIZE 8 @@ -438,11 +418,6 @@ `define L3WORD_SIZE `L3BANK_LINE_SIZE `endif -// Number of cycles to complete stage 1 (read from memory) -`ifndef L3STAGE_1_CYCLES -`define L3STAGE_1_CYCLES 1 -`endif - // Core Request Queue Size `ifndef L3CREQ_SIZE `define L3CREQ_SIZE 8 diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index f67a9685..fbcee41f 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -63,7 +63,6 @@ module VX_mem_unit # ( .NUM_BANKS (`SNUM_BANKS), .WORD_SIZE (`SWORD_SIZE), .NUM_REQUESTS (`SNUM_REQUESTS), - .STAGE_1_CYCLES (`SSTAGE_1_CYCLES), .CREQ_SIZE (`SCREQ_SIZE), .MRVQ_SIZE (8), .DFPQ_SIZE (1), @@ -147,7 +146,6 @@ module VX_mem_unit # ( .NUM_BANKS (`DNUM_BANKS), .WORD_SIZE (`DWORD_SIZE), .NUM_REQUESTS (`DNUM_REQUESTS), - .STAGE_1_CYCLES (`DSTAGE_1_CYCLES), .CREQ_SIZE (`DCREQ_SIZE), .MRVQ_SIZE (`DMRVQ_SIZE), .DFPQ_SIZE (`DDFPQ_SIZE), @@ -232,7 +230,6 @@ module VX_mem_unit # ( .NUM_BANKS (`INUM_BANKS), .WORD_SIZE (`IWORD_SIZE), .NUM_REQUESTS (`INUM_REQUESTS), - .STAGE_1_CYCLES (`ISTAGE_1_CYCLES), .CREQ_SIZE (`ICREQ_SIZE), .MRVQ_SIZE (`IMRVQ_SIZE), .DFPQ_SIZE (`IDFPQ_SIZE), diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 8122f184..8e11428d 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -393,7 +393,6 @@ module Vortex ( .NUM_BANKS (`L3NUM_BANKS), .WORD_SIZE (`L3WORD_SIZE), .NUM_REQUESTS (`L3NUM_REQUESTS), - .STAGE_1_CYCLES (`L3STAGE_1_CYCLES), .CREQ_SIZE (`L3CREQ_SIZE), .MRVQ_SIZE (`L3MRVQ_SIZE), .DFPQ_SIZE (`L3DFPQ_SIZE), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index f00654bc..0f1fb8b8 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -13,8 +13,6 @@ module VX_bank #( parameter WORD_SIZE = 0, // Number of Word requests per cycle {1, 2, 4, 8, ...} parameter NUM_REQUESTS = 0, - // Number of cycles to complete i 1 (read from memory) - parameter STAGE_1_CYCLES = 0, // Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Core Request Queue Size @@ -113,13 +111,13 @@ module VX_bank #( wire[`REQS_BITS-1:0] debug_tid_st0; wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0; - wire[31:0] debug_pc_st1e; - wire[`NR_BITS-1:0] debug_rd_st1e; - wire[`NW_BITS-1:0] debug_wid_st1e; - wire debug_rw_st1e; - wire[WORD_SIZE-1:0] debug_byteen_st1e; - wire[`REQS_BITS-1:0] debug_tid_st1e; - wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e; + wire[31:0] debug_pc_st1; + wire[`NR_BITS-1:0] debug_rd_st1; + wire[`NW_BITS-1:0] debug_wid_st1; + wire debug_rw_st1; + wire[WORD_SIZE-1:0] debug_byteen_st1; + wire[`REQS_BITS-1:0] debug_tid_st1; + wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1; wire[31:0] debug_pc_st2; wire[`NR_BITS-1:0] debug_rd_st2; @@ -241,9 +239,9 @@ module VX_bank #( wire mrvq_is_snp_st0; wire mrvq_snp_invalidate_st0; - wire mrvq_pending_hazard_st1e; - wire st2_pending_hazard_st1e; - wire force_request_miss_st1e; + wire mrvq_pending_hazard_st1; + wire st2_pending_hazard_st1; + wire force_request_miss_st1; wire[`REQS_BITS-1:0] miss_add_tid; wire[`REQ_TAG_WIDTH-1:0] miss_add_tag; @@ -260,26 +258,15 @@ module VX_bank #( wire dwbq_push_stall; wire dram_fill_req_stall; wire stall_bank_pipe; - - reg is_fill_in_pipe; - wire is_fill_st1 [STAGE_1_CYCLES-1:0]; + wire is_fill_st1; `DEBUG_BEGIN - wire going_to_write_st1 [STAGE_1_CYCLES-1:0]; + wire going_to_write_st1; `DEBUG_END - - always @(*) begin - is_fill_in_pipe = 0; - for (integer j = 0; j < STAGE_1_CYCLES; j++) begin - if (is_fill_st1[j]) begin - is_fill_in_pipe = 1; - end - end - end wire mrvq_pop_unqual = mrvq_valid_st0; wire dfpq_pop_unqual = !mrvq_pop_unqual && !dfpq_empty; - wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1[0] && !is_fill_in_pipe; + wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1 && !is_fill_st1; wire snrq_pop_unqual = !mrvq_stop && !reqq_pop_unqual && !reqq_pop_unqual && !mrvq_pop_unqual && !dfpq_pop_unqual && !snrq_empty && !reqq_req_st0; // if there's any reqq_req, don't schedule snrq. assign mrvq_pop = mrvq_pop_unqual && !stall_bank_pipe && !recover_mrvq_state_st2; @@ -300,15 +287,15 @@ module VX_bank #( wire qual_is_snp_st0; wire qual_snp_invalidate_st0; - wire valid_st1 [STAGE_1_CYCLES-1:0]; - wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0]; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1 [STAGE_1_CYCLES-1:0]; - wire [`WORD_WIDTH-1:0] writeword_st1 [STAGE_1_CYCLES-1:0]; - wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0]; - wire [`BANK_LINE_WIDTH-1:0] writedata_st1 [STAGE_1_CYCLES-1:0]; - wire is_snp_st1 [STAGE_1_CYCLES-1:0]; - wire snp_invalidate_st1 [STAGE_1_CYCLES-1:0]; - wire is_mrvq_st1 [STAGE_1_CYCLES-1:0]; + wire valid_st1; + wire [`LINE_ADDR_WIDTH-1:0] addr_st1; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1; + wire [`WORD_WIDTH-1:0] writeword_st1; + wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1; + wire [`BANK_LINE_WIDTH-1:0] writedata_st1; + wire is_snp_st1; + wire snp_invalidate_st1; + wire is_mrvq_st1; assign qual_is_fill_st0 = dfpq_pop_unqual; @@ -362,69 +349,45 @@ module VX_bank #( VX_generic_register #( .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) - ) s0_1_c0 ( + ) pipe_reg0 ( .clk (clk), .reset (reset), .stall (stall_bank_pipe), .flush (0), .in ({qual_is_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}), - .out ({is_mrvq_st1[0] , is_snp_st1[0], snp_invalidate_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]}) + .out ({is_mrvq_st1 , is_snp_st1, snp_invalidate_st1, going_to_write_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1}) ); - for (genvar i = 1; i < STAGE_1_CYCLES; i++) begin - VX_generic_register #( - .N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH) - ) s0_1_cc ( - .clk (clk), - .reset (reset), - .stall (stall_bank_pipe), - .flush (0), - .in ({is_mrvq_st1[i-1], is_snp_st1[i-1], snp_invalidate_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}), - .out ({is_mrvq_st1[i] , is_snp_st1[i], snp_invalidate_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]}) - ); - end - - wire[`WORD_WIDTH-1:0] readword_st1e; - wire[`BANK_LINE_WIDTH-1:0] readdata_st1e; - wire[`TAG_SELECT_BITS-1:0] readtag_st1e; - wire miss_st1e; - wire dirty_st1e; - wire[BANK_LINE_SIZE-1:0] dirtyb_st1e; + wire[`WORD_WIDTH-1:0] readword_st1; + wire[`BANK_LINE_WIDTH-1:0] readdata_st1; + wire[`TAG_SELECT_BITS-1:0] readtag_st1; + wire miss_st1; + wire dirty_st1; + wire[BANK_LINE_SIZE-1:0] dirtyb_st1; `DEBUG_BEGIN - wire [`REQ_TAG_WIDTH-1:0] tag_st1e; - wire [`REQS_BITS-1:0] tid_st1e; + wire [`REQ_TAG_WIDTH-1:0] tag_st1; + wire [`REQS_BITS-1:0] tid_st1; `DEBUG_END - wire mem_rw_st1e; - wire [WORD_SIZE-1:0] mem_byteen_st1e; - wire fill_saw_dirty_st1e; - wire is_snp_st1e; - wire snp_invalidate_st1e; - wire snp_to_mrvq_st1e; - wire mrvq_init_ready_state_st1e; + wire mem_rw_st1; + wire [WORD_SIZE-1:0] mem_byteen_st1; + wire fill_saw_dirty_st1; + wire snp_to_mrvq_st1; + wire mrvq_init_ready_state_st1; wire miss_add_because_miss; - wire valid_st1e; - wire is_mrvq_st1e; - wire mrvq_recover_ready_state_st1e; - wire[`LINE_ADDR_WIDTH-1:0] addr_st1e; + wire mrvq_recover_ready_state_st1; - assign is_mrvq_st1e = is_mrvq_st1[STAGE_1_CYCLES-1]; - assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1]; - assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1]; - assign snp_invalidate_st1e = snp_invalidate_st1 [STAGE_1_CYCLES-1]; - assign addr_st1e = addr_st1[STAGE_1_CYCLES-1]; + assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1; - assign {tag_st1e, mem_rw_st1e, mem_byteen_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; + assign st2_pending_hazard_st1 = (miss_add_because_miss) + && ((addr_st2 == addr_st1) && !is_fill_st2); - assign st2_pending_hazard_st1e = (miss_add_because_miss) - && ((addr_st2 == addr_st1e) && !is_fill_st2); + assign force_request_miss_st1 = (valid_st1 && !is_mrvq_st1 && (mrvq_pending_hazard_st1 || st2_pending_hazard_st1)) + || (valid_st1 && is_mrvq_st1 && recover_mrvq_state_st2); - assign force_request_miss_st1e = (valid_st1e && !is_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e)) - || (valid_st1e && is_mrvq_st1e && recover_mrvq_state_st2); - - assign mrvq_recover_ready_state_st1e = valid_st1e - && is_mrvq_st1e + assign mrvq_recover_ready_state_st1 = valid_st1 + && is_mrvq_st1 && recover_mrvq_state_st2 - && (addr_st2 == addr_st1e); + && (addr_st2 == addr_st1); VX_tag_data_access #( .BANK_ID (BANK_ID), @@ -434,7 +397,6 @@ module VX_bank #( .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), - .STAGE_1_CYCLES (STAGE_1_CYCLES), .DRAM_ENABLE (DRAM_ENABLE), .WRITE_ENABLE (WRITE_ENABLE) ) tag_data_access ( @@ -442,54 +404,54 @@ module VX_bank #( .reset (reset), `ifdef DBG_CORE_REQ_INFO - .debug_pc_st1e(debug_pc_st1e), - .debug_rd_st1e(debug_rd_st1e), - .debug_wid_st1e(debug_wid_st1e), - .debug_tagid_st1e(debug_tagid_st1e), + .debug_pc_st1 (debug_pc_st1), + .debug_rd_st1 (debug_rd_st1), + .debug_wid_st1 (debug_wid_st1), + .debug_tagid_st1(debug_tagid_st1), `endif .stall (stall_bank_pipe), .stall_bank_pipe(stall_bank_pipe), - .force_request_miss_st1e(force_request_miss_st1e), + .force_request_miss_st1(force_request_miss_st1), // Initial Read - .readaddr_st10(addr_st1[0][`LINE_SELECT_BITS-1:0]), + .readaddr_st1(addr_st1[`LINE_SELECT_BITS-1:0]), // Actual Read/Write - .valid_req_st1e (valid_st1e), - .writefill_st1e (is_fill_st1[STAGE_1_CYCLES-1]), - .writeaddr_st1e (addr_st1e), - .wordsel_st1e (wsel_st1[STAGE_1_CYCLES-1]), - .writeword_st1e (writeword_st1[STAGE_1_CYCLES-1]), - .writedata_st1e (writedata_st1[STAGE_1_CYCLES-1]), + .valid_req_st1 (valid_st1), + .writefill_st1 (is_fill_st1), + .writeaddr_st1 (addr_st1), + .wordsel_st1 (wsel_st1), + .writeword_st1 (writeword_st1), + .writedata_st1 (writedata_st1), - .mem_rw_st1e (mem_rw_st1e), - .mem_byteen_st1e (mem_byteen_st1e), + .mem_rw_st1 (mem_rw_st1), + .mem_byteen_st1 (mem_byteen_st1), - .is_snp_st1e (is_snp_st1e), - .snp_invalidate_st1e (snp_invalidate_st1e), + .is_snp_st1 (is_snp_st1), + .snp_invalidate_st1(snp_invalidate_st1), // Read Data - .readword_st1e (readword_st1e), - .readdata_st1e (readdata_st1e), - .readtag_st1e (readtag_st1e), - .miss_st1e (miss_st1e), - .dirty_st1e (dirty_st1e), - .dirtyb_st1e (dirtyb_st1e), - .fill_saw_dirty_st1e (fill_saw_dirty_st1e), - .snp_to_mrvq_st1e (snp_to_mrvq_st1e), - .mrvq_init_ready_state_st1e(mrvq_init_ready_state_st1e) + .readword_st1 (readword_st1), + .readdata_st1 (readdata_st1), + .readtag_st1 (readtag_st1), + .miss_st1 (miss_st1), + .dirty_st1 (dirty_st1), + .dirtyb_st1 (dirtyb_st1), + .fill_saw_dirty_st1(fill_saw_dirty_st1), + .snp_to_mrvq_st1(snp_to_mrvq_st1), + .mrvq_init_ready_state_st1(mrvq_init_ready_state_st1) ); `ifdef DBG_CORE_REQ_INFO if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin - assign {debug_pc_st1e, debug_rd_st1e, debug_wid_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; + assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1; end `endif - wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1]; - wire is_mrvq_st1e_st2 = is_mrvq_st1e; + wire qual_valid_st1_2 = valid_st1 && !is_fill_st1; + wire is_mrvq_st1_st2 = is_mrvq_st1; wire valid_st2; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2; @@ -510,16 +472,16 @@ module VX_bank #( wire mrvq_recover_ready_state_st2; wire mrvq_init_ready_state_unqual_st2; wire mrvq_init_ready_state_hazard_st0_st1; - wire mrvq_init_ready_state_hazard_st1e_st1; + wire mrvq_init_ready_state_hazard_st1_st1; VX_generic_register #( .N(1+ 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH) - ) st_1e_2 ( + ) pipe_reg1 ( .clk (clk), .reset (reset), .stall (stall_bank_pipe), .flush (0), - .in ({mrvq_recover_ready_state_st1e, is_mrvq_st1e_st2, mrvq_init_ready_state_st1e, snp_to_mrvq_st1e, is_snp_st1e, snp_invalidate_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1], qual_valid_st1e_2, addr_st1e, wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, dirtyb_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), + .in ({mrvq_recover_ready_state_st1, is_mrvq_st1_st2, mrvq_init_ready_state_st1, snp_to_mrvq_st1, is_snp_st1, snp_invalidate_st1, fill_saw_dirty_st1, is_fill_st1, qual_valid_st1_2, addr_st1, wsel_st1, writeword_st1, readword_st1, readdata_st1, readtag_st1, miss_st1, dirty_st1, dirtyb_st1, inst_meta_st1}), .out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2}) ); @@ -554,11 +516,11 @@ module VX_bank #( wire miss_add_is_mrvq = valid_st2 && is_mrvq_st2 && !stall_bank_pipe; assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == dfpq_addr_st0); // Doesn't need to be muxed to qual, only care about fills - assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add_unqual && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1e); + assign mrvq_init_ready_state_hazard_st1_st1 = miss_add_unqual && is_fill_st1 && (miss_add_addr == addr_st1); assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 // When req was in st1e, either matched with an mrvq entery OR mrvq recovering state || mrvq_init_ready_state_hazard_st0_st1 // If there's a fill in st0 that has the same address as miss_add_addr - || mrvq_init_ready_state_hazard_st1e_st1; // If there's a fill in st1 that has the same address as miss_add_addr + || mrvq_init_ready_state_hazard_st1_st1; // If there's a fill in st1 that has the same address as miss_add_addr VX_cache_miss_resrv #( .BANK_ID (BANK_ID), @@ -591,9 +553,9 @@ module VX_bank #( .mrvq_init_ready_state (mrvq_init_ready_state_st2), // Broadcast - .is_fill_st1 (is_fill_st1[STAGE_1_CYCLES-1]), - .fill_addr_st1 (addr_st1e), - .pending_hazard_st1 (mrvq_pending_hazard_st1e), + .is_fill_st1 (is_fill_st1), + .fill_addr_st1 (addr_st1), + .pending_hazard_st1 (mrvq_pending_hazard_st1), // Dequeue .miss_resrv_pop (mrvq_pop), @@ -761,17 +723,17 @@ module VX_bank #( `endif `SCOPE_ASSIGN (scope_bank_valid_st0, qual_valid_st0); -`SCOPE_ASSIGN (scope_bank_valid_st1, valid_st1e); +`SCOPE_ASSIGN (scope_bank_valid_st1, valid_st1); `SCOPE_ASSIGN (scope_bank_valid_st2, valid_st2); -`SCOPE_ASSIGN (scope_bank_is_mrvq_st1, is_mrvq_st1e); -`SCOPE_ASSIGN (scope_bank_miss_st1, miss_st1e); -`SCOPE_ASSIGN (scope_bank_dirty_st1, dirty_st1e); -`SCOPE_ASSIGN (scope_bank_force_miss_st1, force_request_miss_st1e); +`SCOPE_ASSIGN (scope_bank_is_mrvq_st1, is_mrvq_st1); +`SCOPE_ASSIGN (scope_bank_miss_st1, miss_st1); +`SCOPE_ASSIGN (scope_bank_dirty_st1, dirty_st1); +`SCOPE_ASSIGN (scope_bank_force_miss_st1, force_request_miss_st1); `SCOPE_ASSIGN (scope_bank_stall_pipe, stall_bank_pipe); `SCOPE_ASSIGN (scope_bank_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID)); -`SCOPE_ASSIGN (scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1e, BANK_ID)); +`SCOPE_ASSIGN (scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); `SCOPE_ASSIGN (scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); endmodule diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 63e31394..93bbc77e 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -12,8 +12,6 @@ module VX_cache #( parameter WORD_SIZE = 4, // Number of Word requests per cycle {1, 2, 4, 8, ...} parameter NUM_REQUESTS = 4, - // Number of cycles to complete stage 1 (read from memory) - parameter STAGE_1_CYCLES = 1, // Queues feeding into banks Knobs {1, 2, 4, 8, ...} @@ -359,7 +357,6 @@ module VX_cache #( .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), .NUM_REQUESTS (NUM_REQUESTS), - .STAGE_1_CYCLES (STAGE_1_CYCLES), .CREQ_SIZE (CREQ_SIZE), .MRVQ_SIZE (MRVQ_SIZE), .DFPQ_SIZE (DFPQ_SIZE), diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index e636356a..a1b80838 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -13,9 +13,6 @@ module VX_tag_data_access #( // Size of a word in bytes parameter WORD_SIZE = 0, - // Number of cycles to complete stage 1 (read from memory) - parameter STAGE_1_CYCLES = 0, - // Enable cache writeable parameter WRITE_ENABLE = 0, @@ -27,62 +24,57 @@ module VX_tag_data_access #( `ifdef DBG_CORE_REQ_INFO `IGNORE_WARNINGS_BEGIN - input wire[31:0] debug_pc_st1e, - input wire[`NR_BITS-1:0] debug_rd_st1e, - input wire[`NW_BITS-1:0] debug_wid_st1e, - input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e, + input wire[31:0] debug_pc_st1, + input wire[`NR_BITS-1:0] debug_rd_st1, + input wire[`NW_BITS-1:0] debug_wid_st1, + input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1, `IGNORE_WARNINGS_END `endif input wire stall, - input wire is_snp_st1e, - input wire snp_invalidate_st1e, + input wire is_snp_st1, + input wire snp_invalidate_st1, input wire stall_bank_pipe, - input wire force_request_miss_st1e, + input wire force_request_miss_st1, - input wire[`LINE_SELECT_BITS-1:0] readaddr_st10, - input wire[`LINE_ADDR_WIDTH-1:0] writeaddr_st1e, + input wire[`LINE_SELECT_BITS-1:0] readaddr_st1, + input wire[`LINE_ADDR_WIDTH-1:0] writeaddr_st1, - input wire valid_req_st1e, - input wire writefill_st1e, - input wire[`WORD_WIDTH-1:0] writeword_st1e, - input wire[`BANK_LINE_WIDTH-1:0] writedata_st1e, + input wire valid_req_st1, + input wire writefill_st1, + input wire[`WORD_WIDTH-1:0] writeword_st1, + input wire[`BANK_LINE_WIDTH-1:0] writedata_st1, `IGNORE_WARNINGS_BEGIN - input wire mem_rw_st1e, - input wire[WORD_SIZE-1:0] mem_byteen_st1e, - input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_st1e, + input wire mem_rw_st1, + input wire[WORD_SIZE-1:0] mem_byteen_st1, + input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_st1, `IGNORE_WARNINGS_END - output wire[`WORD_WIDTH-1:0] readword_st1e, - output wire[`BANK_LINE_WIDTH-1:0] readdata_st1e, - output wire[`TAG_SELECT_BITS-1:0] readtag_st1e, - output wire miss_st1e, - output wire dirty_st1e, - output wire[BANK_LINE_SIZE-1:0] dirtyb_st1e, - output wire fill_saw_dirty_st1e, - output wire snp_to_mrvq_st1e, - output wire mrvq_init_ready_state_st1e + output wire[`WORD_WIDTH-1:0] readword_st1, + output wire[`BANK_LINE_WIDTH-1:0] readdata_st1, + output wire[`TAG_SELECT_BITS-1:0] readtag_st1, + output wire miss_st1, + output wire dirty_st1, + output wire[BANK_LINE_SIZE-1:0] dirtyb_st1, + output wire fill_saw_dirty_st1, + output wire snp_to_mrvq_st1, + output wire mrvq_init_ready_state_st1 ); - - wire read_valid_st1c[STAGE_1_CYCLES-1:0]; - wire read_dirty_st1c[STAGE_1_CYCLES-1:0]; - wire[BANK_LINE_SIZE-1:0] read_dirtyb_st1c[STAGE_1_CYCLES-1:0]; - wire[`TAG_SELECT_BITS-1:0] read_tag_st1c [STAGE_1_CYCLES-1:0]; - wire[`BANK_LINE_WIDTH-1:0] read_data_st1c [STAGE_1_CYCLES-1:0]; - + `UNUSED_VAR (stall) + wire qual_read_valid_st1; wire qual_read_dirty_st1; wire[BANK_LINE_SIZE-1:0] qual_read_dirtyb_st1; wire[`TAG_SELECT_BITS-1:0] qual_read_tag_st1; wire[`BANK_LINE_WIDTH-1:0] qual_read_data_st1; - wire use_read_valid_st1e; - wire use_read_dirty_st1e; - wire[BANK_LINE_SIZE-1:0] use_read_dirtyb_st1e; - wire[`TAG_SELECT_BITS-1:0] use_read_tag_st1e; - wire[`BANK_LINE_WIDTH-1:0] use_read_data_st1e; + wire use_read_valid_st1; + wire use_read_dirty_st1; + wire[BANK_LINE_SIZE-1:0] use_read_dirtyb_st1; + wire[`TAG_SELECT_BITS-1:0] use_read_tag_st1; + wire[`BANK_LINE_WIDTH-1:0] use_read_data_st1; wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] use_write_enable; wire[`BANK_LINE_WIDTH-1:0] use_write_data; @@ -90,11 +82,11 @@ module VX_tag_data_access #( wire invalidate_line; wire tags_match; - wire real_writefill = valid_req_st1e && writefill_st1e - && ((~use_read_valid_st1e) || (use_read_valid_st1e && ~tags_match)); + wire real_writefill = valid_req_st1 && writefill_st1 + && ((~use_read_valid_st1) || (use_read_valid_st1 && ~tags_match)); - wire[`TAG_SELECT_BITS-1:0] writetag_st1e = writeaddr_st1e[`TAG_LINE_ADDR_RNG]; - wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0]; + wire[`TAG_SELECT_BITS-1:0] writetag_st1 = writeaddr_st1[`TAG_LINE_ADDR_RNG]; + wire[`LINE_SELECT_BITS-1:0] writeladdr_st1 = writeaddr_st1[`LINE_SELECT_BITS-1:0]; VX_tag_data_store #( .CACHE_SIZE (CACHE_SIZE), @@ -106,7 +98,7 @@ module VX_tag_data_access #( .reset (reset), .stall_bank_pipe(stall_bank_pipe), - .read_addr (readaddr_st10), + .read_addr (readaddr_st1), .read_valid (qual_read_valid_st1), .read_dirty (qual_read_dirty_st1), .read_dirtyb (qual_read_dirtyb_st1), @@ -116,115 +108,90 @@ module VX_tag_data_access #( .invalidate (invalidate_line), .write_enable(use_write_enable), .write_fill (real_writefill), - .write_addr (writeladdr_st1e), - .tag_index (writetag_st1e), + .write_addr (writeladdr_st1), + .tag_index (writetag_st1), .write_data (use_write_data), .fill_sent (fill_sent) ); - VX_generic_register #( - .N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH), - .PASSTHRU(1) - ) s0_1_c0 ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (0), - .in ({qual_read_valid_st1, qual_read_dirty_st1, qual_read_dirtyb_st1, qual_read_tag_st1, qual_read_data_st1}), - .out ({read_valid_st1c[0], read_dirty_st1c[0], read_dirtyb_st1c[0], read_tag_st1c[0], read_data_st1c[0]}) - ); - - for (genvar i = 1; i < STAGE_1_CYCLES-1; i++) begin - VX_generic_register #( - .N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH) - ) s0_1_cc ( - .clk (clk), - .reset (reset), - .stall (stall), - .flush (0), - .in ({read_valid_st1c[i-1], read_dirty_st1c[i-1], read_dirtyb_st1c[i-1], read_tag_st1c[i-1], read_data_st1c[i-1]}), - .out ({read_valid_st1c[i], read_dirty_st1c[i], read_dirtyb_st1c[i], read_tag_st1c[i], read_data_st1c[i]}) - ); - end - - assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || !DRAM_ENABLE; // If shared memory, always valid - assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache - assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writetag_st1e; // Tag is always the same in SM - assign use_read_dirtyb_st1e= read_dirtyb_st1c[STAGE_1_CYCLES-1]; - assign use_read_data_st1e = read_data_st1c[STAGE_1_CYCLES-1]; + assign use_read_valid_st1 = qual_read_valid_st1 || !DRAM_ENABLE; // If shared memory, always valid + assign use_read_dirty_st1 = qual_read_dirty_st1 && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache + assign use_read_tag_st1 = DRAM_ENABLE ? qual_read_tag_st1 : writetag_st1; // Tag is always the same in SM + assign use_read_dirtyb_st1= qual_read_dirtyb_st1; + assign use_read_data_st1 = qual_read_data_st1; if (`WORD_SELECT_WIDTH != 0) begin - wire [`WORD_WIDTH-1:0] readword = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH]; + wire [`WORD_WIDTH-1:0] readword = use_read_data_st1[wordsel_st1 * `WORD_WIDTH +: `WORD_WIDTH]; for (genvar i = 0; i < WORD_SIZE; i++) begin - assign readword_st1e[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st1e[i]}}; + assign readword_st1[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st1[i]}}; end end else begin for (genvar i = 0; i < WORD_SIZE; i++) begin - assign readword_st1e[i * 8 +: 8] = use_read_data_st1e[i * 8 +: 8] & {8{mem_byteen_st1e[i]}}; + assign readword_st1[i * 8 +: 8] = use_read_data_st1[i * 8 +: 8] & {8{mem_byteen_st1[i]}}; end end wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] we; wire [`BANK_LINE_WIDTH-1:0] data_write; - wire should_write = mem_rw_st1e - && valid_req_st1e - && use_read_valid_st1e - && ~miss_st1e - && ~is_snp_st1e + wire should_write = mem_rw_st1 + && valid_req_st1 + && use_read_valid_st1 + && ~miss_st1 + && ~is_snp_st1 && ~real_writefill; for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin - wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1e == `UP(`WORD_SELECT_WIDTH)'(i))) + wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1 == `UP(`WORD_SELECT_WIDTH)'(i))) && should_write; assign we[i] = real_writefill ? {WORD_SIZE{1'b1}} : - normal_write ? mem_byteen_st1e : + normal_write ? mem_byteen_st1 : {WORD_SIZE{1'b0}}; - assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = real_writefill ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1e; + assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = real_writefill ? writedata_st1[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st1; end - assign use_write_enable = (writefill_st1e && ~real_writefill) ? 0 : we; + assign use_write_enable = (writefill_st1 && ~real_writefill) ? 0 : we; assign use_write_data = data_write; // use "case equality" to handle uninitialized tag when block entry is not valid - assign tags_match = (writetag_st1e === use_read_tag_st1e); + assign tags_match = (writetag_st1 === use_read_tag_st1); - wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && ~force_request_miss_st1e; - wire req_invalid = valid_req_st1e && ~is_snp_st1e && ~use_read_valid_st1e && ~writefill_st1e; - wire req_miss = valid_req_st1e && ~is_snp_st1e && use_read_valid_st1e && ~writefill_st1e && ~tags_match; + wire snoop_hit_no_pending = valid_req_st1 && is_snp_st1 && use_read_valid_st1 && tags_match && (use_read_dirty_st1 || snp_invalidate_st1) && ~force_request_miss_st1; + wire req_invalid = valid_req_st1 && ~is_snp_st1 && ~use_read_valid_st1 && ~writefill_st1; + wire req_miss = valid_req_st1 && ~is_snp_st1 && use_read_valid_st1 && ~writefill_st1 && ~tags_match; wire real_miss = req_invalid || req_miss; - wire force_core_miss = (force_request_miss_st1e && ~is_snp_st1e && ~writefill_st1e && valid_req_st1e && ~real_miss); - assign snp_to_mrvq_st1e = valid_req_st1e && is_snp_st1e && force_request_miss_st1e; + wire force_core_miss = (force_request_miss_st1 && ~is_snp_st1 && ~writefill_st1 && valid_req_st1 && ~real_miss); + assign snp_to_mrvq_st1 = valid_req_st1 && is_snp_st1 && force_request_miss_st1; // The second term is basically saying always make an entry ready if there's already antoher entry waiting, even if you yourself see a miss - assign mrvq_init_ready_state_st1e = snp_to_mrvq_st1e - || (force_request_miss_st1e && ~is_snp_st1e && ~writefill_st1e && valid_req_st1e); + assign mrvq_init_ready_state_st1 = snp_to_mrvq_st1 + || (force_request_miss_st1 && ~is_snp_st1 && ~writefill_st1 && valid_req_st1); - assign miss_st1e = real_miss || snoop_hit_no_pending || force_core_miss; - assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e; - assign dirtyb_st1e = use_read_dirtyb_st1e; - assign readdata_st1e = use_read_data_st1e; - assign readtag_st1e = use_read_tag_st1e; - assign fill_sent = miss_st1e; - assign fill_saw_dirty_st1e = real_writefill && dirty_st1e; - assign invalidate_line = snoop_hit_no_pending; + assign miss_st1 = real_miss || snoop_hit_no_pending || force_core_miss; + assign dirty_st1 = valid_req_st1 && use_read_valid_st1 && use_read_dirty_st1; + assign dirtyb_st1 = use_read_dirtyb_st1; + assign readdata_st1 = use_read_data_st1; + assign readtag_st1 = use_read_tag_st1; + assign fill_sent = miss_st1; + assign fill_saw_dirty_st1 = real_writefill && dirty_st1; + assign invalidate_line = snoop_hit_no_pending; `ifdef DBG_PRINT_CACHE_BANK always @(posedge clk) begin - if (valid_req_st1e) begin + if (valid_req_st1) begin if ((| use_write_enable)) begin - if (writefill_st1e) begin - $display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data); + if (writefill_st1) begin + $display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data); end else begin - $display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e); + $display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1); end end else - if (miss_st1e) begin - $display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_rd_st1e, dirty_st1e); + if (miss_st1) begin + $display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1); end else begin - $display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1); + $display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1); end end end