From f68af3bb843f3709ab00c2637abe7bc8e84eebe6 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 1 Dec 2020 00:54:25 -0800 Subject: [PATCH] using mshr pending request size --- hw/rtl/VX_cluster.v | 12 +- hw/rtl/VX_config.vh | 4 +- hw/rtl/VX_csr_io_arb.v | 10 +- hw/rtl/Vortex.v | 12 +- hw/rtl/cache/VX_bank.v | 228 ++++++++++-------- hw/rtl/cache/VX_cache_core_req_bank_sel.v | 2 +- ...{VX_cache_miss_resrv.v => VX_miss_resrv.v} | 10 +- hw/rtl/cache/VX_tag_access.v | 2 +- hw/syn/quartus/project.tcl | 1 - 9 files changed, 147 insertions(+), 134 deletions(-) rename hw/rtl/cache/{VX_cache_miss_resrv.v => VX_miss_resrv.v} (96%) diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 34c3df00..80e32d2f 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -256,17 +256,17 @@ module VX_cluster #( .csr_io_req_data_in (csr_io_req_data), .csr_io_req_ready_in (csr_io_req_ready), - // input responses - .csr_io_rsp_valid_in (per_core_csr_io_rsp_valid), - .csr_io_rsp_data_in (per_core_csr_io_rsp_data), - .csr_io_rsp_ready_in (per_core_csr_io_rsp_ready), - // output request .csr_io_req_valid_out (per_core_csr_io_req_valid), .csr_io_req_addr_out (per_core_csr_io_req_addr), .csr_io_req_rw_out (per_core_csr_io_req_rw), .csr_io_req_data_out (per_core_csr_io_req_data), - .csr_io_req_ready_out (per_core_csr_io_req_ready), + .csr_io_req_ready_out (per_core_csr_io_req_ready), + + // input responses + .csr_io_rsp_valid_in (per_core_csr_io_rsp_valid), + .csr_io_rsp_data_in (per_core_csr_io_rsp_data), + .csr_io_rsp_ready_in (per_core_csr_io_rsp_ready), // output response .csr_io_rsp_valid_out (csr_io_rsp_valid), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 98a3cc5d..e7eaa03e 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -209,7 +209,7 @@ // Miss Handling Register Size `ifndef DMSHR_SIZE -`define DMSHR_SIZE `MAX(`LSUQ_SIZE, 4) +`define DMSHR_SIZE `LSUQ_SIZE `endif // DRAM Request Queue Size @@ -251,7 +251,7 @@ // Miss Handling Register Size `ifndef IMSHR_SIZE -`define IMSHR_SIZE `MAX(`NUM_WARPS, 4) +`define IMSHR_SIZE `NUM_WARPS `endif // DRAM Request Queue Size diff --git a/hw/rtl/VX_csr_io_arb.v b/hw/rtl/VX_csr_io_arb.v index f0d88ba2..b45250e9 100644 --- a/hw/rtl/VX_csr_io_arb.v +++ b/hw/rtl/VX_csr_io_arb.v @@ -16,11 +16,6 @@ module VX_csr_io_arb #( input wire [31:0] csr_io_req_data_in, output wire csr_io_req_ready_in, - // input response - input wire [NUM_REQUESTS-1:0] csr_io_rsp_valid_in, - input wire [NUM_REQUESTS-1:0][31:0] csr_io_rsp_data_in, - output wire [NUM_REQUESTS-1:0] csr_io_rsp_ready_in, - // output request output wire [NUM_REQUESTS-1:0] csr_io_req_valid_out, output wire [NUM_REQUESTS-1:0][11:0] csr_io_req_addr_out, @@ -28,6 +23,11 @@ module VX_csr_io_arb #( output wire [NUM_REQUESTS-1:0][31:0] csr_io_req_data_out, input wire [NUM_REQUESTS-1:0] csr_io_req_ready_out, + // input response + input wire [NUM_REQUESTS-1:0] csr_io_rsp_valid_in, + input wire [NUM_REQUESTS-1:0][31:0] csr_io_rsp_data_in, + output wire [NUM_REQUESTS-1:0] csr_io_rsp_ready_in, + // output response output wire csr_io_rsp_valid_out, output wire [31:0] csr_io_rsp_data_out, diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index a6bed5d4..0e7fb5e2 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -297,17 +297,17 @@ module Vortex ( .csr_io_req_data_in (csr_io_req_data), .csr_io_req_ready_in (csr_io_req_ready), - // input responses - .csr_io_rsp_valid_in (per_cluster_csr_io_rsp_valid), - .csr_io_rsp_data_in (per_cluster_csr_io_rsp_data), - .csr_io_rsp_ready_in (per_cluster_csr_io_rsp_ready), - // output request .csr_io_req_valid_out (per_cluster_csr_io_req_valid), .csr_io_req_addr_out (per_cluster_csr_io_req_addr), .csr_io_req_rw_out (per_cluster_csr_io_req_rw), .csr_io_req_data_out (per_cluster_csr_io_req_data), - .csr_io_req_ready_out (per_cluster_csr_io_req_ready), + .csr_io_req_ready_out (per_cluster_csr_io_req_ready), + + // input responses + .csr_io_rsp_valid_in (per_cluster_csr_io_rsp_valid), + .csr_io_rsp_data_in (per_cluster_csr_io_rsp_data), + .csr_io_rsp_ready_in (per_cluster_csr_io_rsp_ready), // output response .csr_io_rsp_valid_out (csr_io_rsp_valid), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index e2b24b41..9e23765f 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -214,20 +214,20 @@ module VX_bank #( assign dram_rsp_ready = 0; end - wire reqq_pop; - wire reqq_empty; - wire reqq_full; - wire [`REQS_BITS-1:0] reqq_tid_st0; - wire reqq_rw_st0; - wire [WORD_SIZE-1:0] reqq_byteen_st0; + wire creq_pop; + wire creq_empty; + wire creq_full; + wire [`REQS_BITS-1:0] creq_tid_st0; + wire creq_rw_st0; + wire [WORD_SIZE-1:0] creq_byteen_st0; `IGNORE_WARNINGS_BEGIN - wire [`WORD_ADDR_WIDTH-1:0] reqq_addr_st0; + wire [`WORD_ADDR_WIDTH-1:0] creq_addr_st0; `IGNORE_WARNINGS_END - wire [`WORD_WIDTH-1:0] reqq_writeword_st0; - wire [CORE_TAG_WIDTH-1:0] reqq_tag_st0; + wire [`WORD_WIDTH-1:0] creq_writeword_st0; + wire [CORE_TAG_WIDTH-1:0] creq_tag_st0; wire core_req_fire = (| core_req_valid) && core_req_ready; - assign core_req_ready = !reqq_full; + assign core_req_ready = !creq_full; VX_bank_core_req_arb #( .WORD_SIZE (WORD_SIZE), @@ -249,52 +249,31 @@ module VX_bank #( .writedata_in (core_req_data), // Dequeue - .pop (reqq_pop), - .tag_out (reqq_tag_st0), - .tid_out (reqq_tid_st0), - .rw_out (reqq_rw_st0), - .byteen_out (reqq_byteen_st0), - .addr_out (reqq_addr_st0), - .writedata_out (reqq_writeword_st0), + .pop (creq_pop), + .tag_out (creq_tag_st0), + .tid_out (creq_tid_st0), + .rw_out (creq_rw_st0), + .byteen_out (creq_byteen_st0), + .addr_out (creq_addr_st0), + .writedata_out (creq_writeword_st0), // States - .empty (reqq_empty), - .full (reqq_full) - ); + .empty (creq_empty), + .full (creq_full) + ); - wire mshr_pop; - wire mshr_full; - wire mshr_almfull; - wire mshr_valid_st0; - wire[`REQS_BITS-1:0] mshr_tid_st0; - wire [`LINE_ADDR_WIDTH-1:0] mshr_addr_st0; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] mshr_wsel_st0; - wire [`WORD_WIDTH-1:0] mshr_writeword_st0; - wire [`REQ_TAG_WIDTH-1:0] mshr_tag_st0; - wire mshr_rw_st0; - wire [WORD_SIZE-1:0] mshr_byteen_st0; - wire mshr_is_snp_st0; - wire mshr_snp_inv_st0; - wire is_mshr_miss_st2; - wire is_mshr_miss_st3; - - wire mshr_push_stall; - wire cwbq_push_stall; - wire dwbq_push_stall; - wire snpq_push_stall; - wire pipeline_stall; - - // determine which queue to pop next in piority order - wire mshr_pop_unqual = mshr_valid_st0; - wire dfpq_pop_unqual = !mshr_pop_unqual && !dfpq_empty; - wire reqq_pop_unqual = !mshr_pop_unqual && !dfpq_pop_unqual && !reqq_empty && !mshr_almfull; - wire snrq_pop_unqual = !mshr_pop_unqual && !dfpq_pop_unqual && !reqq_pop_unqual && !snrq_empty && !mshr_almfull; - - assign mshr_pop = mshr_pop_unqual && !pipeline_stall - && !(is_mshr_miss_st2 || is_mshr_miss_st3); // stop if previous request was a miss - assign dfpq_pop = dfpq_pop_unqual && !pipeline_stall; - assign reqq_pop = reqq_pop_unqual && !pipeline_stall; - assign snrq_pop = snrq_pop_unqual && !pipeline_stall; + reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size; + wire mshr_pop; + wire mshr_valid_st0; + wire[`REQS_BITS-1:0] mshr_tid_st0; + wire [`LINE_ADDR_WIDTH-1:0] mshr_addr_st0; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] mshr_wsel_st0; + wire [`WORD_WIDTH-1:0] mshr_writeword_st0; + wire [`REQ_TAG_WIDTH-1:0] mshr_tag_st0; + wire mshr_rw_st0; + wire [WORD_SIZE-1:0] mshr_byteen_st0; + wire mshr_is_snp_st0; + wire mshr_snp_inv_st0; wire is_fill_st0; wire is_mshr_st0; @@ -325,7 +304,7 @@ module VX_bank #( wire dirty_st1; wire [WORD_SIZE-1:0] mem_byteen_st1; wire writeen_st1; - wire mem_rw_st1; + wire mem_rw_st1; `DEBUG_BEGIN wire [`REQ_TAG_WIDTH-1:0] tag_st1; wire [`REQS_BITS-1:0] tid_st1; @@ -337,7 +316,7 @@ module VX_bank #( wire [`WORD_WIDTH-1:0] readword_st2; wire [`BANK_LINE_WIDTH-1:0] readdata_st2; wire [`BANK_LINE_WIDTH-1:0] writedata_st2; - wire [WORD_SIZE-1:0] mem_byteen_st2; + wire [WORD_SIZE-1:0] mem_byteen_st2; wire dirty_st2; wire [BANK_LINE_SIZE-1:0] dirtyb_st2; wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2; @@ -349,25 +328,65 @@ module VX_bank #( wire miss_st2; wire force_miss_st2; wire[`LINE_ADDR_WIDTH-1:0] addr_st2; - wire writeen_st2; + wire writeen_st2; + wire core_req_hit_st2; + wire valid_st3; + wire is_mshr_st3; wire miss_st3; wire force_miss_st3; wire [`LINE_ADDR_WIDTH-1:0] addr_st3; + wire core_req_hit_st1; + + wire mshr_push_stall; + wire cwbq_push_stall; + wire dwbq_push_stall; + wire snpq_push_stall; + wire pipeline_stall; + + wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2); + wire is_mshr_miss_st3 = valid_st3 && is_mshr_st3 && (miss_st3 || force_miss_st3); + + wire creq_commit = valid_st1 && core_req_hit_st1 && !pipeline_stall; + + wire mshr_going_full = (mshr_pending_size == MSHR_SIZE); + + // determine which queue to pop next in piority order + wire mshr_pop_unqual = mshr_valid_st0; + wire dfpq_pop_unqual = !mshr_pop_unqual && !dfpq_empty; + wire creq_pop_unqual = !mshr_pop_unqual && !dfpq_pop_unqual && !creq_empty && !mshr_going_full; + wire snrq_pop_unqual = !mshr_pop_unqual && !dfpq_pop_unqual && !creq_pop_unqual && !snrq_empty && !mshr_going_full; + + assign mshr_pop = mshr_pop_unqual && !pipeline_stall + && !(is_mshr_miss_st2 || is_mshr_miss_st3); // stop if previous request was a miss + assign dfpq_pop = dfpq_pop_unqual && !pipeline_stall; + assign creq_pop = creq_pop_unqual && !pipeline_stall; + assign snrq_pop = snrq_pop_unqual && !pipeline_stall; + + // MSHR pending size + always @(posedge clk) begin + if (reset) begin + mshr_pending_size <= 0; + end else begin + mshr_pending_size <= mshr_pending_size + + ((creq_pop && !creq_commit) ? 1 : ((creq_commit && !creq_pop) ? -1 : 0)); + end + end + assign is_mshr_st0 = mshr_pop_unqual; assign is_fill_st0 = dfpq_pop_unqual; - assign valid_st0 = dfpq_pop || mshr_pop || reqq_pop || snrq_pop; + assign valid_st0 = dfpq_pop || mshr_pop || creq_pop || snrq_pop; assign addr_st0 = mshr_pop_unqual ? mshr_addr_st0 : dfpq_pop_unqual ? dfpq_addr_st0 : - reqq_pop_unqual ? reqq_addr_st0[`LINE_SELECT_ADDR_RNG] : + creq_pop_unqual ? creq_addr_st0[`LINE_SELECT_ADDR_RNG] : snrq_pop_unqual ? snrq_addr_st0 : 0; if (`WORD_SELECT_WIDTH != 0) begin - assign wsel_st0 = reqq_pop_unqual ? reqq_addr_st0[`WORD_SELECT_WIDTH-1:0] : + assign wsel_st0 = creq_pop_unqual ? creq_addr_st0[`WORD_SELECT_WIDTH-1:0] : mshr_pop_unqual ? mshr_wsel_st0 : 0; end else begin @@ -378,7 +397,7 @@ module VX_bank #( assign writedata_st0 = dfpq_filldata_st0; assign inst_meta_st0 = mshr_pop_unqual ? {`REQ_TAG_WIDTH'(mshr_tag_st0), mshr_rw_st0, mshr_byteen_st0, mshr_tid_st0} : - reqq_pop_unqual ? {`REQ_TAG_WIDTH'(reqq_tag_st0), reqq_rw_st0, reqq_byteen_st0, reqq_tid_st0} : + creq_pop_unqual ? {`REQ_TAG_WIDTH'(creq_tag_st0), creq_rw_st0, creq_byteen_st0, creq_tid_st0} : snrq_pop_unqual ? {`REQ_TAG_WIDTH'(snrq_tag_st0), 1'b0, WORD_SIZE'(0), `REQS_BITS'(0)} : 0; @@ -391,7 +410,7 @@ module VX_bank #( 0; assign writeword_st0 = mshr_pop_unqual ? mshr_writeword_st0 : - reqq_pop_unqual ? reqq_writeword_st0 : + creq_pop_unqual ? creq_writeword_st0 : 0; `ifdef DBG_CACHE_REQ_INFO @@ -432,12 +451,10 @@ if (DRAM_ENABLE) begin assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1; - // we have a matching previous request that missed alreedy - wire st2_pending_hazard_st1 = valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st1); - wire st3_pending_hazard_st1 = valid_st3 && (miss_st3 || force_miss_st3) && (addr_st3 == addr_st1); - // force miss to ensure commit order when a new request has pending previous requests to same block // also force a miss for msrq requests when previous requests got a miss + wire st2_pending_hazard_st1 = valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st1); + wire st3_pending_hazard_st1 = valid_st3 && (miss_st3 || force_miss_st3) && (addr_st3 == addr_st1); assign force_miss_st1 = (valid_st1 && !is_mshr_st1 && !is_fill_st1 && (mshr_pending_hazard_st1 || st2_pending_hazard_st1 || st3_pending_hazard_st1)) || (valid_st1 && is_mshr_st1 && is_mshr_miss_st2); @@ -481,23 +498,26 @@ if (DRAM_ENABLE) begin .writeen_out (writeen_st1) ); + assign core_req_hit_st1 = !is_fill_st1 && !is_snp_st1 && !miss_st1 && !force_miss_st1; + assign misses = miss_st1; VX_generic_register #( - .N(1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH), + .N(1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + WORD_SIZE + `REQ_INST_META_WIDTH), .R(1) ) pipe_reg1 ( .clk (clk), .reset (reset), .stall (pipeline_stall), .flush (1'b0), - .in ({valid_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, dirty_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}), - .out ({valid_st2, is_mshr_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2}) + .in ({valid_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_snp_st1, snp_inv_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_byteen_st1, inst_meta_st1}), + .out ({valid_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_snp_st2, snp_inv_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_byteen_st2, inst_meta_st2}) ); end else begin `UNUSED_VAR (mshr_pending_hazard_unqual_st0) + `UNUSED_VAR (dram_rsp_fire) `UNUSED_VAR (addr_st0) assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1; @@ -505,19 +525,19 @@ end else begin assign is_fill_st1 = is_fill_st0; assign is_mshr_st1 = is_mshr_st0; assign is_snp_st1 = is_snp_st0; - assign valid_st1 = valid_st0; + assign valid_st1 = valid_st0; assign wsel_st1 = wsel_st0; assign writeword_st1= writeword_st0; assign writedata_st1= writedata_st0; assign inst_meta_st1= inst_meta_st0; assign snp_inv_st1 = snp_inv_st0; - assign addr_st1 = reqq_addr_st0[`LINE_SELECT_ADDR_RNG]; + assign addr_st1 = creq_addr_st0[`LINE_SELECT_ADDR_RNG]; assign dirty_st1 = 0; assign readtag_st1 = 0; assign miss_st1 = 0; assign writeen_st1 = valid_st1 && mem_rw_st1; assign force_miss_st1 = 0; - + assign is_fill_st2 = is_fill_st1; assign is_mshr_st2 = is_mshr_st1; assign is_snp_st2 = is_snp_st1; @@ -528,13 +548,19 @@ end else begin assign inst_meta_st2= inst_meta_st1; assign snp_inv_st2 = snp_inv_st1; assign addr_st2 = addr_st1; - assign mem_byteen_st2 = mem_byteen_st1; assign dirty_st2 = dirty_st1; + assign mem_byteen_st2 = mem_byteen_st1; assign readtag_st2 = readtag_st1; assign miss_st2 = miss_st1; assign writeen_st2 = writeen_st1; assign force_miss_st2 = force_miss_st1; + assign core_req_hit_st1 = 0; + assign core_req_hit_st2 = 0; + assign send_dwb_req_st2 = 0; + assign do_writeback_st2 = 0; + assign incoming_fill_st2 = 0; + assign misses = 0; end @@ -547,8 +573,6 @@ end end `endif - assign is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2); - VX_data_access #( .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), @@ -587,7 +611,6 @@ end .dirtyb_out (dirtyb_st2) ); - wire valid_st3; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st3; wire [`WORD_WIDTH-1:0] writeword_st3; wire [`WORD_WIDTH-1:0] readword_st3; @@ -596,15 +619,11 @@ end wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st3; wire [`TAG_SELECT_BITS-1:0] readtag_st3; wire is_snp_st3; - wire snp_inv_st3; - wire is_mshr_st3; - wire send_core_rsp_st3; + wire snp_inv_st3; + wire core_req_hit_st3; wire send_dwb_req_st3; wire do_writeback_st3; - wire send_snp_rsp_st3; - wire incoming_fill_st3; - - wire send_core_rsp_st2 = !is_fill_st2 && !is_snp_st2 && !miss_st2 && !force_miss_st2; + wire incoming_fill_st3; // check if a matching fill request is comming wire incoming_fill_dfp_st2 = dram_rsp_fire && (addr_st2 == dram_rsp_addr); @@ -625,18 +644,16 @@ end wire send_dwb_req_st2 = send_fill_req_st2 || do_writeback_st2; - wire send_snp_rsp_st2 = is_snp_st2 && !force_miss_st2; - VX_generic_register #( - .N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH), + .N(1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + BANK_LINE_SIZE + `REQ_INST_META_WIDTH), .R(1) ) pipe_reg2 ( .clk (clk), .reset (reset), .stall (pipeline_stall), .flush (1'b0), - .in ({valid_st2, send_core_rsp_st2, send_dwb_req_st2, do_writeback_st2, send_snp_rsp_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}), - .out ({valid_st3, send_core_rsp_st3, send_dwb_req_st3, do_writeback_st3, send_snp_rsp_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3}) + .in ({valid_st2, core_req_hit_st2, send_dwb_req_st2, do_writeback_st2, incoming_fill_st2, force_miss_st2, is_mshr_st2, is_snp_st2, snp_inv_st2, addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirtyb_st2, inst_meta_st2}), + .out ({valid_st3, core_req_hit_st3, send_dwb_req_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, is_snp_st3, snp_inv_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3}) ); `ifdef DBG_CACHE_REQ_INFO @@ -647,8 +664,6 @@ end end `endif - assign is_mshr_miss_st3 = valid_st3 && is_mshr_st3 && (miss_st3 || force_miss_st3); - // Enqueue to miss reserv if it's a valid miss wire[`REQS_BITS-1:0] req_tid_st3; @@ -657,17 +672,22 @@ end wire[WORD_SIZE-1:0] req_byteen_st3; wire mshr_push_unqual = valid_st3 && (miss_st3 || force_miss_st3); - assign mshr_push_stall = mshr_push_unqual && mshr_full; + assign mshr_push_stall = 0; wire mshr_push = mshr_push_unqual - && !mshr_full && !cwbq_push_stall && !dwbq_push_stall - && !snpq_push_stall; + && !snpq_push_stall; + + wire mshr_full; + always @(posedge clk) begin + assert(!mshr_push || !mshr_full); // mmshr stall is detected before issuing new requests + end assign {req_tag_st3, req_rw_st3, req_byteen_st3, req_tid_st3} = inst_meta_st3; if (DRAM_ENABLE) begin + wire mshr_dequeue_st3 = valid_st3 && is_mshr_st3 && !mshr_push_unqual && !pipeline_stall; // mark msrq entry that match DRAM fill as 'ready' @@ -677,7 +697,7 @@ end // or the fill request is comming for the missed block wire mshr_init_ready_state_st3 = valid_st3 && (!miss_st3 || incoming_fill_st3); - VX_cache_miss_resrv #( + VX_miss_resrv #( .BANK_ID (BANK_ID), .CACHE_ID (CACHE_ID), .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS), @@ -688,7 +708,7 @@ end .MSHR_SIZE (MSHR_SIZE), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), .SNP_TAG_WIDTH (SNP_TAG_WIDTH) - ) cache_miss_resrv ( + ) miss_resrv ( .clk (clk), .reset (reset), @@ -717,7 +737,6 @@ end .enqueue_mshr_st3 (is_mshr_st3), .enqueue_ready_st3 (mshr_init_ready_state_st3), .enqueue_full (mshr_full), - .enqueue_almfull (mshr_almfull), // fill .update_ready_st0 (update_ready_st0), @@ -749,7 +768,6 @@ end `UNUSED_VAR (incoming_fill_st3) assign mshr_pending_hazard_unqual_st0 = 0; assign mshr_full = 0; - assign mshr_almfull = 0; assign mshr_valid_st0 = 0; assign mshr_addr_st0 = 0; assign mshr_wsel_st0 = 0; @@ -766,7 +784,7 @@ end wire cwbq_empty, cwbq_full; - wire cwbq_push_unqual = valid_st3 && send_core_rsp_st3 && !req_rw_st3; + wire cwbq_push_unqual = valid_st3 && core_req_hit_st3 && !req_rw_st3; assign cwbq_push_stall = cwbq_push_unqual && cwbq_full; wire cwbq_push = cwbq_push_unqual @@ -823,7 +841,8 @@ end if (DRAM_ENABLE) begin VX_generic_queue #( .DATAW(1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH), - .SIZE(DREQ_SIZE) + .SIZE(DREQ_SIZE), + .BUFFERED(1) ) dwb_queue ( .clk (clk), .reset (reset), @@ -858,7 +877,7 @@ end wire snpq_empty, snpq_full; - wire snpq_push_unqual = valid_st3 && send_snp_rsp_st3; + wire snpq_push_unqual = valid_st3 && is_snp_st3 && !force_miss_st3; assign snpq_push_stall = snpq_push_unqual && snpq_full; @@ -875,7 +894,8 @@ end if (FLUSH_ENABLE) begin VX_generic_queue #( .DATAW (SNP_TAG_WIDTH), - .SIZE (SNPQ_SIZE) + .SIZE (SNPQ_SIZE), + .BUFFERED(1) ) snp_rsp_queue ( .clk (clk), .reset (reset), @@ -936,11 +956,11 @@ end if (dfpq_pop) begin $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0); end - if (reqq_pop) begin - if (reqq_rw_st0) - $display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_byteen_st0, reqq_writeword_st0, debug_wid_st0, debug_pc_st0); + if (creq_pop) begin + if (creq_rw_st0) + $display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), creq_tag_st0, creq_tid_st0, creq_byteen_st0, creq_writeword_st0, debug_wid_st0, debug_pc_st0); else - $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_byteen_st0, reqq_writeword_st0, debug_wid_st0, debug_pc_st0); + $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), creq_tag_st0, creq_tid_st0, creq_byteen_st0, creq_writeword_st0, debug_wid_st0, debug_pc_st0); end if (snrq_pop) begin $display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_inv_st0); diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index 65a601d1..7d28e8cb 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -24,7 +24,7 @@ module VX_cache_core_req_bank_sel #( always @(*) begin per_bank_valid_r = 0; - per_bank_ready_other = {NUM_BANKS{1'b1}}; + per_bank_ready_other = {NUM_BANKS{1'b1}}; per_bank_ready_ignore = {NUM_BANKS{1'b1}}; for (integer i = 0; i < NUM_BANKS; i++) begin diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v similarity index 96% rename from hw/rtl/cache/VX_cache_miss_resrv.v rename to hw/rtl/cache/VX_miss_resrv.v index b049fcf3..7ba5aa03 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -1,6 +1,6 @@ `include "VX_cache_config.vh" -module VX_cache_miss_resrv #( +module VX_miss_resrv #( parameter CACHE_ID = 0, parameter BANK_ID = 0, @@ -51,7 +51,6 @@ module VX_cache_miss_resrv #( input wire enqueue_mshr_st3, input wire enqueue_ready_st3, output wire enqueue_full, - output wire enqueue_almfull, // fill input wire update_ready_st0, @@ -72,9 +71,8 @@ module VX_cache_miss_resrv #( output wire dequeue_snp_inv_st0, input wire dequeue_st3 ); - localparam FULL_DISTANCE = 3; // need 3 cycles window to prevent pipeline lock - wire [`MSHR_METADATA_WIDTH-1:0] metadata_table; + `NO_RW_RAM_CHECK reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0]; reg [MSHR_SIZE-1:0] valid_table; @@ -82,13 +80,9 @@ module VX_cache_miss_resrv #( reg [`LOG2UP(MSHR_SIZE)-1:0] schedule_ptr, restore_ptr; reg [`LOG2UP(MSHR_SIZE)-1:0] head_ptr; reg [`LOG2UP(MSHR_SIZE)-1:0] tail_ptr; - reg [`LOG2UP(MSHR_SIZE+1)-1:0] size; - `STATIC_ASSERT(MSHR_SIZE > FULL_DISTANCE, ("invalid size")) - assign enqueue_full = (size == $bits(size)'(MSHR_SIZE)); - assign enqueue_almfull = (size >= $bits(size)'(MSHR_SIZE-FULL_DISTANCE)); wire [MSHR_SIZE-1:0] valid_address_match; for (genvar i = 0; i < MSHR_SIZE; i++) begin diff --git a/hw/rtl/cache/VX_tag_access.v b/hw/rtl/cache/VX_tag_access.v index b5b5e976..ad593fdd 100644 --- a/hw/rtl/cache/VX_tag_access.v +++ b/hw/rtl/cache/VX_tag_access.v @@ -85,7 +85,7 @@ module VX_tag_access #( ); // use "case equality" to handle uninitialized tag when block entry is not valid - wire tags_match = read_valid && (addrtag === read_tag); + wire tags_match = read_valid && (addrtag == read_tag); assign do_write = WRITE_ENABLE && valid_in diff --git a/hw/syn/quartus/project.tcl b/hw/syn/quartus/project.tcl index 9bb216ba..0e85bf48 100644 --- a/hw/syn/quartus/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -40,7 +40,6 @@ set_global_assignment -name VERILOG_MACRO NDEBUG set_global_assignment -name MESSAGE_DISABLE 16818 set_global_assignment -name VERILOG_MACRO FPU_FAST -set_global_assignment -name AUTO_SHIFT_REGISTER_RECOGNITION AUTO set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"