From 0cd9bd689e2e70518f6b605085f7f3bc1a81ebda Mon Sep 17 00:00:00 2001 From: felsabbagh3 Date: Sat, 23 May 2020 21:36:57 -0700 Subject: [PATCH] Added schedule_ptr to mrvq for speculative pops --- hw/rtl/cache/VX_bank.v | 127 ++++++++++++++++++++--------- hw/rtl/cache/VX_cache_miss_resrv.v | 39 ++++++--- 2 files changed, 113 insertions(+), 53 deletions(-) diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 47fff059..cd5a47b3 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -245,7 +245,7 @@ module VX_bank #( wire[`LINE_ADDR_WIDTH-1:0] addr_st2; wire is_fill_st2; - wire msrq_push_stall; + wire mrvq_push_stall; wire cwbq_push_stall; wire dwbq_push_stall; wire dram_fill_req_stall; @@ -268,10 +268,17 @@ module VX_bank #( end end - assign mrvq_pop = mrvq_valid_st0 && !stall_bank_pipe; - assign dfpq_pop = !mrvq_pop && !dfpq_empty && !stall_bank_pipe; - assign reqq_pop = !mrvq_stop && !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !is_fill_in_pipe; - assign snrq_pop = !mrvq_stop && !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && !snrq_empty && !stall_bank_pipe; + + wire mrvq_pop_unqual = mrvq_valid_st0; + wire dfpq_pop_unqual = !mrvq_pop_unqual && !dfpq_empty; + wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1[0] && !is_fill_in_pipe; + wire snrq_pop_unqual = !mrvq_stop && !reqq_pop_unqual && !reqq_pop_unqual && !mrvq_pop_unqual && !dfpq_pop_unqual && !snrq_empty; + + + assign mrvq_pop = mrvq_pop_unqual && !stall_bank_pipe && !recover_mrvq_state_st2; + assign dfpq_pop = dfpq_pop_unqual && !stall_bank_pipe; + assign reqq_pop = reqq_pop_unqual && !stall_bank_pipe; + assign snrq_pop = snrq_pop_unqual && !stall_bank_pipe; wire qual_is_fill_st0; wire qual_valid_st0; @@ -294,41 +301,41 @@ module VX_bank #( wire is_snp_st1 [STAGE_1_CYCLES-1:0]; wire from_mrvq_st1 [STAGE_1_CYCLES-1:0]; - assign qual_is_fill_st0 = dfpq_pop; + assign qual_is_fill_st0 = dfpq_pop_unqual; assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; - assign qual_addr_st0 = dfpq_pop ? dfpq_addr_st0 : - mrvq_pop ? mrvq_addr_st0 : - reqq_pop ? reqq_req_addr_st0[31:`LINE_SELECT_ADDR_START] : - snrq_pop ? snrq_addr_st0 : + assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 : + mrvq_pop_unqual ? mrvq_addr_st0 : + reqq_pop_unqual ? reqq_req_addr_st0[31:`LINE_SELECT_ADDR_START] : + snrq_pop_unqual ? snrq_addr_st0 : 0; - assign qual_wsel_st0 = reqq_pop ? reqq_req_addr_st0[`BASE_ADDR_BITS-1:0] : - mrvq_pop ? mrvq_wsel_st0 : + assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`BASE_ADDR_BITS-1:0] : + mrvq_pop_unqual ? mrvq_wsel_st0 : 0; - assign qual_writedata_st0 = dfpq_pop ? dfpq_filldata_st0 : 57; + assign qual_writedata_st0 = dfpq_pop_unqual ? dfpq_filldata_st0 : 57; - assign qual_inst_meta_st0 = mrvq_pop ? {`REQ_TAG_WIDTH'(mrvq_tag_st0) , mrvq_mem_read_st0, mrvq_mem_write_st0, mrvq_tid_st0} : - reqq_pop ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_mem_read_st0, reqq_req_mem_write_st0, reqq_req_tid_st0} : - snrq_pop ? {`REQ_TAG_WIDTH'(snrq_tag_st0), `BYTE_EN_BITS'(0), `BYTE_EN_BITS'(0), `REQS_BITS'(0)} : + assign qual_inst_meta_st0 = mrvq_pop_unqual ? {`REQ_TAG_WIDTH'(mrvq_tag_st0) , mrvq_mem_read_st0, mrvq_mem_write_st0, mrvq_tid_st0} : + reqq_pop_unqual ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_mem_read_st0, reqq_req_mem_write_st0, reqq_req_tid_st0} : + snrq_pop_unqual ? {`REQ_TAG_WIDTH'(snrq_tag_st0), `BYTE_EN_BITS'(0), `BYTE_EN_BITS'(0), `REQS_BITS'(0)} : 0; - assign qual_going_to_write_st0 = dfpq_pop ? 1 : - (mrvq_pop && (mrvq_mem_write_st0 != `BYTE_EN_NO)) ? 1 : - (reqq_pop && (reqq_req_mem_write_st0 != `BYTE_EN_NO)) ? 1 : + assign qual_going_to_write_st0 = dfpq_pop_unqual ? 1 : + (mrvq_pop_unqual && (mrvq_mem_write_st0 != `BYTE_EN_NO)) ? 1 : + (reqq_pop_unqual && (reqq_req_mem_write_st0 != `BYTE_EN_NO)) ? 1 : 0; - assign qual_is_snp_st0 = mrvq_pop ? mrvq_is_snp_st0 : - snrq_pop ? 1 : + assign qual_is_snp_st0 = mrvq_pop_unqual ? mrvq_is_snp_st0 : + snrq_pop_unqual ? 1 : 0; - assign qual_writeword_st0 = mrvq_pop ? mrvq_writeword_st0 : - reqq_pop ? reqq_req_writeword_st0 : + assign qual_writeword_st0 = mrvq_pop_unqual ? mrvq_writeword_st0 : + reqq_pop_unqual ? reqq_req_writeword_st0 : 0; - assign qual_from_mrvq_st0 = mrvq_pop; + assign qual_from_mrvq_st0 = mrvq_pop_unqual; `DEBUG_BEGIN if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin @@ -377,14 +384,18 @@ module VX_bank #( wire snp_to_mrvq_st1e; wire mrvq_init_ready_state_st1e; wire miss_add_because_miss; + wire valid_st1e; + wire from_mrvq_st1e; - assign is_snp_st1e = is_snp_st1[STAGE_1_CYCLES-1]; + assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1]; + assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1]; + assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1]; assign {tag_st1e, mem_read_st1e, mem_write_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1]; assign st2_pending_hazard_st1e = (miss_add_because_miss) && ((addr_st2 == addr_st1[STAGE_1_CYCLES-1]) && !is_fill_st2); - assign force_request_miss_st1e = (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e) && valid_st1[STAGE_1_CYCLES-1] && !from_mrvq_st1[STAGE_1_CYCLES-1]; + assign force_request_miss_st1e = (valid_st1e && !from_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e)) || (valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2); VX_tag_data_access #( .CACHE_SIZE (CACHE_SIZE), @@ -406,7 +417,7 @@ module VX_bank #( .readaddr_st10(addr_st1[0][`LINE_SELECT_BITS-1:0]), // Actual Read/Write - .valid_req_st1e(valid_st1[STAGE_1_CYCLES-1]), + .valid_req_st1e(valid_st1e), .writefill_st1e(is_fill_st1[STAGE_1_CYCLES-1]), .writeaddr_st1e(addr_st1[STAGE_1_CYCLES-1]), .writewsel_st1e(wsel_st1[STAGE_1_CYCLES-1]), @@ -435,7 +446,8 @@ module VX_bank #( end `DEBUG_END - wire qual_valid_st1e_2 = valid_st1[STAGE_1_CYCLES-1] && !is_fill_st1[STAGE_1_CYCLES-1]; + wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1]; + wire from_mrvq_st1e_st2 = from_mrvq_st1e && !is_snp_st1e; wire valid_st2; wire [`BASE_ADDR_BITS-1:0] wsel_st2; @@ -449,20 +461,22 @@ module VX_bank #( wire fill_saw_dirty_st2; wire is_snp_st2; wire snp_to_mrvq_st2; + wire from_mrvq_st2; wire mrvq_init_ready_state_st2; wire mrvq_init_ready_state_unqual_st2; wire mrvq_init_ready_state_hazard_st0_st1; wire mrvq_init_ready_state_hazard_st1e_st1; + wire recover_mrvq_state_st2; VX_generic_register #( - .N(1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `REQ_INST_META_WIDTH) + .N(1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `REQ_INST_META_WIDTH) ) st_1e_2 ( .clk (clk), .reset(reset), .stall(stall_bank_pipe), .flush(1'b0), - .in ({mrvq_init_ready_state_st1e, snp_to_mrvq_st1e, is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), - .out ({mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 }) + .in ({from_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}), + .out ({from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 }) ); @@ -477,13 +491,17 @@ module VX_bank #( wire miss_add_because_pending = snp_to_mrvq_st2; wire miss_add_unqual = (miss_add_because_miss || miss_add_because_pending); - assign msrq_push_stall = miss_add_unqual && mrvq_full; + assign mrvq_push_stall = miss_add_unqual && mrvq_full; + wire miss_add = miss_add_unqual && !mrvq_full && !(cwbq_push_stall || dwbq_push_stall - || dram_fill_req_stall); + || dram_fill_req_stall); + + + assign recover_mrvq_state_st2 = miss_add && from_mrvq_st2; wire [`LINE_ADDR_WIDTH-1:0] miss_add_addr = addr_st2; wire [`BASE_ADDR_BITS-1:0] miss_add_wsel = wsel_st2; @@ -491,9 +509,11 @@ module VX_bank #( assign {miss_add_tag, miss_add_mem_read, miss_add_mem_write, miss_add_tid} = inst_meta_st2; wire miss_add_is_snp = is_snp_st2; + wire miss_add_from_mrvq = valid_st2 && from_mrvq_st2 && !stall_bank_pipe; - assign mrvq_init_ready_state_hazard_st0_st1 = miss_add && qual_is_fill_st0 && (miss_add_addr == qual_addr_st0 ); - assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1[STAGE_1_CYCLES-1]); + + assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == qual_addr_st0 ); + assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add_unqual && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1[STAGE_1_CYCLES-1]); assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 || mrvq_init_ready_state_hazard_st0_st1 || mrvq_init_ready_state_hazard_st1e_st1; @@ -513,6 +533,7 @@ module VX_bank #( // Enqueue .miss_add (miss_add), + .from_mrvq (miss_add_from_mrvq), .miss_add_addr (miss_add_addr), .miss_add_wsel (miss_add_wsel), .miss_add_data (miss_add_data), @@ -557,7 +578,7 @@ module VX_bank #( && !cwbq_full && (miss_add_mem_write == `BYTE_EN_NO) && !(dwbq_push_stall - || msrq_push_stall + || mrvq_push_stall || dram_fill_req_stall); wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2; @@ -586,9 +607,35 @@ module VX_bank #( // Enqueue DRAM fill request - assign dram_fill_req_valid = miss_add && !mrvq_init_ready_state_st2; +// `IGNORE_WARNINGS_BEGIN +// wire invalidate_fill; +// `IGNORE_WARNINGS_END +// wire possible_fill = valid_st2 && miss_st2 && dram_fill_req_ready && ~is_snp_st2; +// wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2; + +// VX_fill_invalidator #( +// .BANK_LINE_SIZE (BANK_LINE_SIZE), +// .NUM_BANKS (NUM_BANKS), +// .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE) +// ) fill_invalidator ( +// .clk (clk), +// .reset (reset), +// .possible_fill (possible_fill), +// .success_fill (is_fill_st2), +// .fill_addr (fill_invalidator_addr), +// .invalidate_fill (invalidate_fill) +// ); + + wire dram_fill_req_unqual = miss_add_unqual && (!mrvq_init_ready_state_st2 || from_mrvq_st2); + + assign dram_fill_req_valid = dram_fill_req_unqual + && dram_fill_req_ready + && !( dwbq_push_stall + || mrvq_push_stall + || cwbq_push_stall); + assign dram_fill_req_addr = addr_st2; - assign dram_fill_req_stall = (valid_st2 && miss_st2 && ~dram_fill_req_ready); + assign dram_fill_req_stall = dram_fill_req_unqual && ~dram_fill_req_ready; // Enqueue DRAM writeback request @@ -609,7 +656,7 @@ module VX_bank #( assign dwbq_push = dwbq_push_unqual && !dwbq_full && !(cwbq_push_stall - || msrq_push_stall + || mrvq_push_stall || dram_fill_req_stall); wire [`BANK_LINE_WIDTH-1:0] dwbq_req_data = readdata_st2; @@ -658,7 +705,7 @@ module VX_bank #( // bank pipeline stall assign stall_bank_pipe = cwbq_push_stall || dwbq_push_stall - || msrq_push_stall + || mrvq_push_stall || dram_fill_req_stall; `ifdef DBG_PRINT_CACHE_BANK diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index fb1aaf8b..8c0c055e 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -23,6 +23,7 @@ module VX_cache_miss_resrv #( // Miss enqueue input wire miss_add, + input wire from_mrvq, input wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr, input wire[`BASE_ADDR_BITS-1:0] miss_add_wsel, input wire[`WORD_WIDTH-1:0] miss_add_data, @@ -57,6 +58,7 @@ module VX_cache_miss_resrv #( reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; reg [MRVQ_SIZE-1:0] valid_table; reg [MRVQ_SIZE-1:0] ready_table; + reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr; reg [`LOG2UP(MRVQ_SIZE)-1:0] head_ptr; reg [`LOG2UP(MRVQ_SIZE)-1:0] tail_ptr; @@ -86,16 +88,21 @@ module VX_cache_miss_resrv #( assign pending_hazard = |(valid_address_match); - wire dequeue_possible = valid_table[head_ptr] && ready_table[head_ptr]; - wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = head_ptr; + wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr]; + wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr; assign miss_resrv_valid_st0 = (MRVQ_SIZE != 2) && dequeue_possible; assign miss_resrv_addr_st0 = addr_table[dequeue_index]; assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, miss_resrv_mem_read_st0, miss_resrv_mem_write_st0, miss_resrv_wsel_st0, miss_resrv_is_snp_st0} = metadata_table[dequeue_index]; - wire mrvq_push = miss_add && enqueue_possible && (MRVQ_SIZE != 2); + wire mrvq_push = miss_add && enqueue_possible && !from_mrvq && (MRVQ_SIZE != 2); wire mrvq_pop = miss_resrv_pop && dequeue_possible; + + wire recover_state = miss_add && from_mrvq; + wire increment_head = !miss_add && from_mrvq; + + wire update_ready = (|make_ready); wire qual_mrvq_init = mrvq_push && mrvq_init_ready_state; @@ -105,18 +112,25 @@ module VX_cache_miss_resrv #( always @(posedge clk) begin if (reset) begin - valid_table <= 0; - ready_table <= 0; - size <= 0; - head_ptr <= 0; - tail_ptr <= 0; + valid_table <= 0; + ready_table <= 0; + size <= 0; + schedule_ptr <= 0; + head_ptr <= 0; + tail_ptr <= 0; end else begin + if (mrvq_push) begin valid_table[enqueue_index] <= 1; ready_table[enqueue_index] <= mrvq_init_ready_state; addr_table[enqueue_index] <= miss_add_addr; metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_mem_read, miss_add_mem_write, miss_add_wsel, miss_add_is_snp}; tail_ptr <= tail_ptr + 1; + end else if (increment_head) begin + valid_table[head_ptr] <= 0; + head_ptr <= head_ptr + 1; + end else if (recover_state) begin + schedule_ptr <= schedule_ptr - 1; end // update entry as 'ready' during DRAM fill response @@ -125,16 +139,15 @@ module VX_cache_miss_resrv #( end if (mrvq_pop) begin - valid_table[dequeue_index] <= 0; ready_table[dequeue_index] <= 0; - head_ptr <= head_ptr + 1; + schedule_ptr <= schedule_ptr + 1; end - if (!(mrvq_push && mrvq_pop)) begin + if (!(mrvq_push && increment_head)) begin if (mrvq_push) begin size <= size + 1; end - if (mrvq_pop) begin + if (increment_head) begin size <= size - 1; end end @@ -149,7 +162,7 @@ module VX_cache_miss_resrv #( for (j = 0; j < MRVQ_SIZE; j++) begin if (valid_table[j]) begin $write(" "); - if (head_ptr == $bits(head_ptr)'(j)) $write("*"); + if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*"); if (~ready_table[j]) $write("!"); $write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID)); end